mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-03 06:29:16 +00:00
Compare commits
215 Commits
optimize-c
...
var-alert
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9867a9519c | ||
|
|
543b334e64 | ||
|
|
3644200488 | ||
|
|
ceddf1f552 | ||
|
|
faa4c4f438 | ||
|
|
4f8b6157a3 | ||
|
|
7fd7040c7f | ||
|
|
7fa1a41437 | ||
|
|
f7b406078f | ||
|
|
f6b10403d9 | ||
|
|
f4ce0bccfc | ||
|
|
f26ce4487d | ||
|
|
9f31f3b57d | ||
|
|
c7a97a9767 | ||
|
|
f94068e611 | ||
|
|
2cd5edf691 | ||
|
|
0ffc67f35f | ||
|
|
6dc5ac47b7 | ||
|
|
2526440efa | ||
|
|
2f8b8fad62 | ||
|
|
9c19201c13 | ||
|
|
4758c14a46 | ||
|
|
2e54ab8c2f | ||
|
|
67f79c2f88 | ||
|
|
749ae70bd7 | ||
|
|
e2dba9b3d3 | ||
|
|
2228842b2f | ||
|
|
38fe37a286 | ||
|
|
7daf1e8c43 | ||
|
|
8706ded776 | ||
|
|
f637078dd9 | ||
|
|
8aa7b1060d | ||
|
|
18634a33b2 | ||
|
|
7ed1b80759 | ||
|
|
3d240704f6 | ||
|
|
ce0322bbd7 | ||
|
|
66f62ca8c5 | ||
|
|
d11d73f6bc | ||
|
|
dee1fe2d61 | ||
|
|
b3da24f18a | ||
|
|
29ea4f6ed2 | ||
|
|
5272b11efc | ||
|
|
c322601138 | ||
|
|
f1357d6f33 | ||
|
|
728d70c707 | ||
|
|
bf93932b22 | ||
|
|
57581be350 | ||
|
|
5793f089f6 | ||
|
|
fa49449588 | ||
|
|
876f1d1084 | ||
|
|
678830be37 | ||
|
|
5e30f3a00d | ||
|
|
7f1eefd033 | ||
|
|
c8dd26ca4c | ||
|
|
37c57e66ea | ||
|
|
878e940325 | ||
|
|
cbc715305d | ||
|
|
5011766c70 | ||
|
|
b3ed8a1e8c | ||
|
|
814ded90b6 | ||
|
|
43e89040eb | ||
|
|
3d339fe03c | ||
|
|
7618858912 | ||
|
|
15b4ef8611 | ||
|
|
5083a5cc96 | ||
|
|
d51e83d7d4 | ||
|
|
601d4f0c95 | ||
|
|
90fac12953 | ||
|
|
19d76824d9 | ||
|
|
1341554bbc | ||
|
|
fd3ce338cb | ||
|
|
b8f36ce3cb | ||
|
|
037112a9e6 | ||
|
|
c6e75d31a1 | ||
|
|
bd24f5b056 | ||
|
|
89551c8edb | ||
|
|
042b44940d | ||
|
|
8cd8674848 | ||
|
|
7bb6ac8a03 | ||
|
|
76b35276af | ||
|
|
439a21b784 | ||
|
|
47e70a2dba | ||
|
|
16b3cb1abc | ||
|
|
32995c1b2d | ||
|
|
b4fa36fa0e | ||
|
|
f412f82eb8 | ||
|
|
9da1cd506b | ||
|
|
99ea838863 | ||
|
|
7feb003b72 | ||
|
|
b0a053361f | ||
|
|
959f75394b | ||
|
|
03e95973b2 | ||
|
|
e890705167 | ||
|
|
6716f1bdf1 | ||
|
|
739b9406a4 | ||
|
|
77f280d1cc | ||
|
|
04fe1b9dd6 | ||
|
|
552758e0e1 | ||
|
|
68bc474c1b | ||
|
|
f692035deb | ||
|
|
eb441353c3 | ||
|
|
b606b22ae6 | ||
|
|
1de0428860 | ||
|
|
3d0c288c9f | ||
|
|
343814a802 | ||
|
|
12e2761467 | ||
|
|
0edd5ee772 | ||
|
|
5e430cedc7 | ||
|
|
a791a9901e | ||
|
|
222cdd76f0 | ||
|
|
ed4e3937e0 | ||
|
|
60f9e1c48e | ||
|
|
276dfe7372 | ||
|
|
4a6dacbe30 | ||
|
|
48eebba11a | ||
|
|
eca82e5ec2 | ||
|
|
21478fcf3d | ||
|
|
a87c856299 | ||
|
|
ba035a446d | ||
|
|
bf840e6bb2 | ||
|
|
cd01092aed | ||
|
|
e202fd50c8 | ||
|
|
f0e5062485 | ||
|
|
861fe96de5 | ||
|
|
5b66ada96d | ||
|
|
d5a98debff | ||
|
|
4977052a67 | ||
|
|
dcc461e587 | ||
|
|
f5ce1733bb | ||
|
|
436cf25409 | ||
|
|
038f68b0b7 | ||
|
|
96ef1895b7 | ||
|
|
eeaa7b46f1 | ||
|
|
dc525352f1 | ||
|
|
98a3fe9375 | ||
|
|
74b0f802ec | ||
|
|
85bd3148d5 | ||
|
|
0931fa9603 | ||
|
|
65cdb2da9e | ||
|
|
9ad6514af6 | ||
|
|
302c6549e4 | ||
|
|
a3122270e6 | ||
|
|
1245c453bb | ||
|
|
9c5ccf0c8f | ||
|
|
cd468af250 | ||
|
|
2d3449c0ec | ||
|
|
e15bdbce92 | ||
|
|
3890243d42 | ||
|
|
37fb4ee867 | ||
|
|
6db63eafc1 | ||
|
|
1e9cbfc316 | ||
|
|
4f95554fe3 | ||
|
|
8eba9aa92f | ||
|
|
6ba74b8e21 | ||
|
|
8ea4632681 | ||
|
|
f958f27de1 | ||
|
|
1bdfa3e032 | ||
|
|
143880cd46 | ||
|
|
38f0b4f1bb | ||
|
|
2bccd5be99 | ||
|
|
7b328b3eaa | ||
|
|
8bd5b90e94 | ||
|
|
96629e284f | ||
|
|
67d2875690 | ||
|
|
238895a1f8 | ||
|
|
fb341b645d | ||
|
|
2d84fd8cf3 | ||
|
|
2611f87c41 | ||
|
|
a5b7aa7a26 | ||
|
|
0714a0f8f1 | ||
|
|
063cc750e1 | ||
|
|
b2a912d72f | ||
|
|
4ba745f442 | ||
|
|
fa7d46ecad | ||
|
|
a5a43df44f | ||
|
|
fbf1d68b84 | ||
|
|
ca712f62a4 | ||
|
|
84ee14d21e | ||
|
|
c9cf1cfdd2 | ||
|
|
9d1c01107f | ||
|
|
7ea31b5c6d | ||
|
|
e8e1c67cc8 | ||
|
|
8079bcd288 | ||
|
|
33b178ce82 | ||
|
|
28c9cd7b43 | ||
|
|
b771e8a3e8 | ||
|
|
4945e98200 | ||
|
|
a938ea3e56 | ||
|
|
25c339025b | ||
|
|
bb0ee35275 | ||
|
|
0fc54ad173 | ||
|
|
1f95e2df94 | ||
|
|
d2969f34ef | ||
|
|
d9a34959dc | ||
|
|
bc6ff7f4ba | ||
|
|
514913a97a | ||
|
|
affc610b7b | ||
|
|
a098d5d39c | ||
|
|
05c3f1e0e4 | ||
|
|
d5740164f2 | ||
|
|
8c2383c410 | ||
|
|
9af024fb99 | ||
|
|
12f3cc21e1 | ||
|
|
0b3bb54eb4 | ||
|
|
da813e2b0c | ||
|
|
50fa2499b7 | ||
|
|
2c5ae5b3a9 | ||
|
|
522932aeb4 | ||
|
|
35ac0ddea5 | ||
|
|
26fa750309 | ||
|
|
1eba607aeb | ||
|
|
6aadd159af | ||
|
|
b6ad87523e | ||
|
|
ea5b6845de | ||
|
|
5ba5096da2 |
22
.github/workflows/issue-translator.yml
vendored
Normal file
22
.github/workflows/issue-translator.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: 'Issue Translator'
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
translate:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Translate Issues
|
||||
uses: usthe/issues-translate-action@v2.7
|
||||
with:
|
||||
# 是否翻译 issue 标题
|
||||
IS_MODIFY_TITLE: true
|
||||
# GitHub Token
|
||||
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# 自定义翻译标注(可选)
|
||||
# CUSTOM_BOT_NOTE: "Translation by bot"
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -58,6 +58,10 @@ _test
|
||||
.idea
|
||||
.index
|
||||
.vscode
|
||||
.issue
|
||||
.issue/*
|
||||
.cursor
|
||||
.claude
|
||||
.DS_Store
|
||||
.cache-loader
|
||||
.payload
|
||||
|
||||
41
.typos.toml
Normal file
41
.typos.toml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Configuration for typos tool
|
||||
[files]
|
||||
extend-exclude = [
|
||||
# Ignore auto-generated easyjson files
|
||||
"*_easyjson.go",
|
||||
# Ignore binary files
|
||||
"*.gz",
|
||||
"*.tar",
|
||||
"n9e",
|
||||
"n9e-*"
|
||||
]
|
||||
|
||||
[default.extend-identifiers]
|
||||
# Didi is a company name (DiDi), not a typo
|
||||
Didi = "Didi"
|
||||
# datas is intentionally used as plural of data (slice variable)
|
||||
datas = "datas"
|
||||
# pendings is intentionally used as plural
|
||||
pendings = "pendings"
|
||||
pendingsUseByRecover = "pendingsUseByRecover"
|
||||
pendingsUseByRecoverMap = "pendingsUseByRecoverMap"
|
||||
# typs is intentionally used as shorthand for types (parameter name)
|
||||
typs = "typs"
|
||||
|
||||
[default.extend-words]
|
||||
# Some false positives
|
||||
ba = "ba"
|
||||
# Specific corrections for ambiguous typos
|
||||
contigious = "contiguous"
|
||||
onw = "own"
|
||||
componet = "component"
|
||||
Patten = "Pattern"
|
||||
Requets = "Requests"
|
||||
Mis = "Miss"
|
||||
exporer = "exporter"
|
||||
soruce = "source"
|
||||
verison = "version"
|
||||
Configations = "Configurations"
|
||||
emmited = "emitted"
|
||||
Utlization = "Utilization"
|
||||
serie = "series"
|
||||
107
README.md
107
README.md
@@ -3,7 +3,7 @@
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源告警管理专家</b>
|
||||
<b>Open-Source Alerting Expert</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -25,94 +25,91 @@
|
||||
|
||||
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
## 夜莺是什么
|
||||
## 🎯 What is Nightingale
|
||||
|
||||
夜莺监控(Nightingale)是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发。
|
||||
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
|
||||
|
||||
夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展委员会(CCF ODC),为 CCF ODC 成立后接受捐赠的第一个开源项目。
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
|
||||
|
||||
## 夜莺的工作逻辑
|
||||

|
||||
|
||||
很多用户已经自行采集了指标、日志数据,此时就把存储库(VictoriaMetrics、ElasticSearch等)作为数据源接入夜莺,即可在夜莺里配置告警规则、通知规则,完成告警事件的生成和派发。
|
||||
## 💡 How Nightingale Works
|
||||
|
||||

|
||||
Many users have already collected metrics and log data. In this case, you can connect your storage repositories (such as VictoriaMetrics, ElasticSearch, etc.) as data sources in Nightingale. This allows you to configure alerting rules and notification rules within Nightingale, enabling the generation and distribution of alarms.
|
||||
|
||||
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
|
||||

|
||||
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
|
||||
Nightingale itself does not provide monitoring data collection capabilities. We recommend using [Categraf](https://github.com/flashcatcloud/categraf) as the collector, which integrates seamlessly with Nightingale.
|
||||
|
||||
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) can collect monitoring data from operating systems, network devices, various middleware, and databases. It pushes this data to Nightingale via the `Prometheus Remote Write` protocol. Nightingale then stores the monitoring data in a time-series database (such as Prometheus, VictoriaMetrics, etc.) and provides alerting and visualization capabilities.
|
||||
|
||||

|
||||
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
|
||||
|
||||
> 上图中,机房A和中心机房的网络链路很好,所以直接由中心端的夜莺进程做告警引擎,机房B和中心机房的网络链路不好,所以在机房B部署了 `n9e-edge` 做告警引擎,对机房B的数据源做告警判定。
|
||||

|
||||
|
||||
## 告警降噪、升级、协同
|
||||
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
|
||||
|
||||
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介(电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
|
||||
## 🔕 Alert Noise Reduction, Escalation, and Collaboration
|
||||
|
||||
如果您有更高级的需求,比如:
|
||||
Nightingale focuses on being an alerting engine, responsible for generating alarms and flexibly distributing them based on rules. It supports 20 built-in notification medias (such as phone calls, SMS, email, DingTalk, Slack, etc.).
|
||||
|
||||
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
|
||||
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
|
||||
If you have more advanced requirements, such as:
|
||||
- Want to consolidate events from multiple monitoring systems into one platform for unified noise reduction, response handling, and data analysis.
|
||||
- Want to support personnel scheduling, practice on-call culture, and support alert escalation (to avoid missing alerts) and collaborative handling.
|
||||
|
||||
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
|
||||
Then Nightingale is not suitable. It is recommended that you choose on-call products such as PagerDuty and FlashDuty. These products are simple and easy to use.
|
||||
|
||||
## 🗨️ Communication Channels
|
||||
|
||||
## 相关资料 & 交流渠道
|
||||
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助(PPT链接在文末)
|
||||
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
|
||||
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
|
||||
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
|
||||
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
|
||||
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
|
||||
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
|
||||
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://n9e.github.io/).
|
||||
|
||||
## 🔑 Key Features
|
||||
|
||||
## 关键特性简介
|
||||

|
||||
|
||||

|
||||
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
|
||||
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
|
||||
- It introduces the concept of business groups and a permission system to manage various rules in a categorized manner.
|
||||
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
|
||||
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
|
||||
|
||||
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
|
||||
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
|
||||
- 支持业务组概念,引入权限体系,分门别类管理各类规则
|
||||
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
|
||||
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
|
||||

|
||||
|
||||

|
||||
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
|
||||
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
|
||||
|
||||
- 夜莺存档了历史告警事件,支持多维度的查询和统计
|
||||
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
|
||||

|
||||
|
||||

|
||||
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
|
||||
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
|
||||
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
|
||||
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
|
||||
|
||||
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
|
||||
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
|
||||
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
|
||||
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
|
||||

|
||||
|
||||
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
|
||||
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
|
||||
- For machine-related monitoring data collected by Categraf, it is advisable to use Nightingale's built-in dashboards for viewing. This is because Categraf's metric naming follows Telegraf's convention, which differs from that of Node Exporter.
|
||||
- Due to Nightingale's concept of business groups (where machines can belong to different groups), there may be scenarios where you only want to view machines within the current business group on the dashboard. Thus, Nightingale's dashboards can be linked with business groups for interactive filtering.
|
||||
|
||||

|
||||
## 🌟 Stargazers over time
|
||||
|
||||
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
|
||||
- 如果你已经习惯了 Grafana,建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
|
||||
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
|
||||
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
|
||||
|
||||
## 广受关注
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## 感谢众多企业的信赖
|
||||
## 🔥 Users
|
||||
|
||||

|
||||

|
||||
|
||||
## 社区共建
|
||||
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践,共建专业、活跃的夜莺开源社区。
|
||||
- ❤️ 夜莺贡献者
|
||||
## 🤝 Community Co-Building
|
||||
|
||||
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
|
||||
- ❤️ Nightingale Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
## 📜 License
|
||||
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
|
||||
|
||||
113
README_en.md
113
README_en.md
@@ -1,113 +0,0 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>Open-source Alert Management Expert, an Integrated Observability Platform</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
|
||||
## What is Nightingale
|
||||
|
||||
Nightingale is an open-source project focused on alerting. Similar to Grafana's data source integration approach, Nightingale also connects with various existing data sources. However, while Grafana focuses on visualization, Nightingale focuses on alerting engines.
|
||||
|
||||
Originally developed and open-sourced by Didi, Nightingale was donated to the China Computer Federation Open Source Development Committee (CCF ODC) on May 11, 2022, becoming the first open-source project accepted by the CCF ODC after its establishment.
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
- 👉 [Documentation](https://flashcat.cloud/docs/) | [Download](https://flashcat.cloud/download/nightingale/)
|
||||
- ❤️ [Report a Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
|
||||
- ℹ️ For faster access, the above documentation and download sites are hosted on [FlashcatCloud](https://flashcat.cloud).
|
||||
|
||||
## Features
|
||||
|
||||
- **Integration with Multiple Time-Series Databases:** Supports integration with various time-series databases such as Prometheus, VictoriaMetrics, Thanos, Mimir, M3DB, and TDengine, enabling unified alert management.
|
||||
- **Advanced Alerting Capabilities:** Comes with built-in support for multiple alerting rules, extensible to common notification channels. It also supports alert suppression, silencing, subscription, self-healing, and alert event management.
|
||||
- **High-Performance Visualization Engine:** Offers various chart styles with numerous built-in dashboard templates and the ability to import Grafana templates. Ready to use with a business-friendly open-source license.
|
||||
- **Support for Common Collectors:** Compatible with [Categraf](https://flashcat.cloud/product/categraf), Telegraf, Grafana-agent, Datadog-agent, and various exporters as collectors—there's no data that can't be monitored.
|
||||
- **Seamless Integration with [Flashduty](https://flashcat.cloud/product/flashcat-duty/):** Enables alert aggregation, acknowledgment, escalation, scheduling, and IM integration, ensuring no alerts are missed, reducing unnecessary interruptions, and enhancing efficient collaboration.
|
||||
|
||||
|
||||
## Screenshots
|
||||
|
||||
You can switch languages and themes in the top right corner. We now support English, Simplified Chinese, and Traditional Chinese.
|
||||
|
||||

|
||||
|
||||
### Instant Query
|
||||
|
||||
Similar to the built-in query analysis page in Prometheus, Nightingale offers an ad-hoc query feature with UI enhancements. It also provides built-in PromQL metrics, allowing users unfamiliar with PromQL to quickly perform queries.
|
||||
|
||||

|
||||
|
||||
### Metric View
|
||||
|
||||
Alternatively, you can use the Metric View to access data. With this feature, Instant Query becomes less necessary, as it caters more to advanced users. Regular users can easily perform queries using the Metric View.
|
||||
|
||||

|
||||
|
||||
### Built-in Dashboards
|
||||
|
||||
Nightingale includes commonly used dashboards that can be imported and used directly. You can also import Grafana dashboards, although compatibility is limited to basic Grafana charts. If you’re accustomed to Grafana, it’s recommended to continue using it for visualization, with Nightingale serving as an alerting engine.
|
||||
|
||||

|
||||
|
||||
### Built-in Alert Rules
|
||||
|
||||
In addition to the built-in dashboards, Nightingale also comes with numerous alert rules that are ready to use out of the box.
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## Architecture
|
||||
|
||||
In most community scenarios, Nightingale is primarily used as an alert engine, integrating with multiple time-series databases to unify alert rule management. Grafana remains the preferred tool for visualization. As an alert engine, the product architecture of Nightingale is as follows:
|
||||
|
||||

|
||||
|
||||
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alert engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
|
||||
|
||||

|
||||
|
||||
|
||||
## Communication Channels
|
||||
|
||||
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
|
||||
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/).
|
||||
|
||||
## Stargazers over time
|
||||
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## Community Co-Building
|
||||
|
||||
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
|
||||
- ❤️ Nightingale Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
122
README_zh.md
Normal file
122
README_zh.md
Normal file
@@ -0,0 +1,122 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源告警管理专家</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
## 夜莺是什么
|
||||
|
||||
夜莺 Nightingale 是一款开源云原生监控告警工具,是中国计算机学会接受捐赠并托管的第一个开源项目,在 GitHub 上有超过 12000 颗星,广受关注和使用。夜莺的统一告警引擎,可以对接 Prometheus、Elasticsearch、ClickHouse、Loki、MySQL 等多种数据源,提供全面的告警判定、丰富的事件处理和灵活的告警分发及通知能力。
|
||||
|
||||
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
|
||||
|
||||
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
|
||||

|
||||
|
||||
## 夜莺的工作逻辑
|
||||
|
||||
很多用户已经自行采集了指标、日志数据,此时就把存储库(VictoriaMetrics、ElasticSearch等)作为数据源接入夜莺,即可在夜莺里配置告警规则、通知规则,完成告警事件的生成和派发。
|
||||
|
||||

|
||||
|
||||
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
|
||||
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
|
||||
|
||||
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
|
||||
|
||||

|
||||
|
||||
> 上图中,机房A和中心机房的网络链路很好,所以直接由中心端的夜莺进程做告警引擎,机房B和中心机房的网络链路不好,所以在机房B部署了 `n9e-edge` 做告警引擎,对机房B的数据源做告警判定。
|
||||
|
||||
## 告警降噪、升级、协同
|
||||
|
||||
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介(电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
|
||||
|
||||
如果您有更高级的需求,比如:
|
||||
|
||||
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
|
||||
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
|
||||
|
||||
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
|
||||
|
||||
|
||||
## 相关资料 & 交流渠道
|
||||
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助(PPT链接在文末)
|
||||
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
|
||||
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
|
||||
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
|
||||
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
|
||||
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
|
||||
|
||||
|
||||
## 关键特性简介
|
||||
|
||||

|
||||
|
||||
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
|
||||
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
|
||||
- 支持业务组概念,引入权限体系,分门别类管理各类规则
|
||||
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
|
||||
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
|
||||
|
||||

|
||||
|
||||
- 夜莺存档了历史告警事件,支持多维度的查询和统计
|
||||
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
|
||||
|
||||

|
||||
|
||||
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
|
||||
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
|
||||
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
|
||||
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
|
||||
|
||||
|
||||

|
||||
|
||||
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
|
||||
- 如果你已经习惯了 Grafana,建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
|
||||
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
|
||||
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
|
||||
|
||||
## 广受关注
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## 感谢众多企业的信赖
|
||||
|
||||

|
||||
|
||||
## 社区共建
|
||||
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践,共建专业、活跃的夜莺开源社区。
|
||||
- ❤️ 夜莺贡献者
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
|
||||
@@ -75,7 +75,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
macros.RegisterMacro(macros.MacroInVain)
|
||||
dscache.Init(ctx, false)
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
|
||||
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
@@ -98,7 +98,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
|
||||
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
|
||||
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType) {
|
||||
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType, configCvalCache *memsto.CvalCache) {
|
||||
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
|
||||
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
|
||||
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
|
||||
@@ -117,14 +117,14 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
|
||||
|
||||
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
|
||||
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, configCvalCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
|
||||
|
||||
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)
|
||||
notifyRecordConsumer := sender.NewNotifyRecordConsumer(ctx)
|
||||
|
||||
go dp.ReloadTpls()
|
||||
go consumer.LoopConsume()
|
||||
go notifyRecordComsumer.LoopConsume()
|
||||
go notifyRecordConsumer.LoopConsume()
|
||||
|
||||
go queue.ReportQueueSize(alertStats)
|
||||
go sender.ReportNotifyRecordQueueSize(alertStats)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -13,6 +14,20 @@ func RuleKey(datasourceId, id int64) string {
|
||||
|
||||
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
|
||||
for _, filter := range itags {
|
||||
// target_group in和not in优先特殊处理:匹配通过则继续下一个 filter,匹配失败则整组不匹配
|
||||
if filter.Key == "target_group" {
|
||||
// target 字段从 event.JsonTagsAndValue() 中获取的
|
||||
v, ok := eventTagsMap["target"]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if !targetGroupMatch(v, filter) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// 普通标签按原逻辑处理
|
||||
value, has := eventTagsMap[filter.Key]
|
||||
if !has {
|
||||
return false
|
||||
@@ -35,9 +50,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
|
||||
func matchTag(value string, filter models.TagFilter) bool {
|
||||
switch filter.Func {
|
||||
case "==":
|
||||
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
|
||||
case "!=":
|
||||
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
|
||||
case "in":
|
||||
_, has := filter.Vset[value]
|
||||
return has
|
||||
@@ -49,6 +64,65 @@ func matchTag(value string, filter models.TagFilter) bool {
|
||||
case "!~":
|
||||
return !filter.Regexp.MatchString(value)
|
||||
}
|
||||
// unexpect func
|
||||
// unexpected func
|
||||
return false
|
||||
}
|
||||
|
||||
// targetGroupMatch 处理 target_group 的特殊匹配逻辑
|
||||
func targetGroupMatch(value string, filter models.TagFilter) bool {
|
||||
var valueMap map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(value), &valueMap); err != nil {
|
||||
return false
|
||||
}
|
||||
switch filter.Func {
|
||||
case "in", "not in":
|
||||
// float64 类型的 id 切片
|
||||
filterValueIds, ok := filter.Value.([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
filterValueIdsMap := make(map[float64]struct{})
|
||||
for _, id := range filterValueIds {
|
||||
filterValueIdsMap[id.(float64)] = struct{}{}
|
||||
}
|
||||
// float64 类型的 groupIds 切片
|
||||
groupIds, ok := valueMap["group_ids"].([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
// in 只要 groupIds 中有一个在 filterGroupIds 中出现,就返回 true
|
||||
// not in 则相反
|
||||
found := false
|
||||
for _, gid := range groupIds {
|
||||
if _, found = filterValueIdsMap[gid.(float64)]; found {
|
||||
break
|
||||
}
|
||||
}
|
||||
if filter.Func == "in" {
|
||||
return found
|
||||
}
|
||||
// filter.Func == "not in"
|
||||
return !found
|
||||
|
||||
case "=~", "!~":
|
||||
// 正则满足一个就认为 matched
|
||||
groupNames, ok := valueMap["group_names"].([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
matched := false
|
||||
for _, gname := range groupNames {
|
||||
if filter.Regexp.MatchString(fmt.Sprintf("%v", gname)) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if filter.Func == "=~" {
|
||||
return matched
|
||||
}
|
||||
// "!~": 只要有一个匹配就返回 false,否则返回 true
|
||||
return !matched
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
@@ -26,10 +27,15 @@ type Consumer struct {
|
||||
alerting aconf.Alerting
|
||||
ctx *ctx.Context
|
||||
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
}
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
|
||||
|
||||
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
|
||||
if promClients.IsNil(datasourceID) {
|
||||
@@ -43,12 +49,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
}
|
||||
|
||||
// 创建一个 Consumer 实例
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
|
||||
return &Consumer{
|
||||
alerting: alerting,
|
||||
ctx: ctx,
|
||||
dispatch: dispatch,
|
||||
promClients: promClients,
|
||||
|
||||
alertMuteCache: alertMuteCache,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,10 +118,6 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
|
||||
e.persist(event)
|
||||
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
e.dispatch.HandleEventNotify(event, false)
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,17 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
|
||||
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType, *memsto.CvalCache,
|
||||
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
|
||||
|
||||
var EventProcessorCache *memsto.EventProcessorCacheType
|
||||
|
||||
func init() {
|
||||
ShouldSkipNotify = shouldSkipNotify
|
||||
SendByNotifyRule = SendNotifyRuleMessage
|
||||
}
|
||||
|
||||
type Dispatch struct {
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
userCache *memsto.UserCacheType
|
||||
@@ -32,6 +43,7 @@ type Dispatch struct {
|
||||
targetCache *memsto.TargetCacheType
|
||||
notifyConfigCache *memsto.NotifyConfigCacheType
|
||||
taskTplsCache *memsto.TaskTplCache
|
||||
configCvalCache *memsto.CvalCache
|
||||
|
||||
notifyRuleCache *memsto.NotifyRuleCacheType
|
||||
notifyChannelCache *memsto.NotifyChannelCacheType
|
||||
@@ -45,9 +57,8 @@ type Dispatch struct {
|
||||
tpls map[string]*template.Template
|
||||
ExtraSenders map[string]sender.Sender
|
||||
BeforeSenderHook func(*models.AlertCurEvent) bool
|
||||
|
||||
ctx *ctx.Context
|
||||
Astats *astats.Stats
|
||||
ctx *ctx.Context
|
||||
Astats *astats.Stats
|
||||
|
||||
RwLock sync.RWMutex
|
||||
}
|
||||
@@ -56,7 +67,7 @@ type Dispatch struct {
|
||||
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
|
||||
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
|
||||
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, configCvalCache *memsto.CvalCache, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
notify := &Dispatch{
|
||||
alertRuleCache: alertRuleCache,
|
||||
userCache: userCache,
|
||||
@@ -69,6 +80,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
notifyChannelCache: notifyChannelCache,
|
||||
messageTemplateCache: messageTemplateCache,
|
||||
eventProcessorCache: eventProcessorCache,
|
||||
configCvalCache: configCvalCache,
|
||||
|
||||
alerting: alerting,
|
||||
|
||||
@@ -77,11 +89,16 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
ExtraSenders: make(map[string]sender.Sender),
|
||||
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
|
||||
|
||||
ctx: ctx,
|
||||
ctx: c,
|
||||
Astats: astats,
|
||||
}
|
||||
|
||||
pipeline.Init()
|
||||
EventProcessorCache = eventProcessorCache
|
||||
|
||||
// 设置通知记录回调函数
|
||||
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
|
||||
|
||||
return notify
|
||||
}
|
||||
|
||||
@@ -162,47 +179,23 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
if !notifyRule.Enable {
|
||||
continue
|
||||
}
|
||||
eventCopy.NotifyRuleId = notifyRuleId
|
||||
eventCopy.NotifyRuleName = notifyRule.Name
|
||||
|
||||
var processors []models.Processor
|
||||
for _, pipelineConfig := range notifyRule.PipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
if !pipelineApplicable(eventPipeline, eventCopy) {
|
||||
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
|
||||
continue
|
||||
}
|
||||
|
||||
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
|
||||
}
|
||||
|
||||
for _, processor := range processors {
|
||||
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
|
||||
eventCopy = processor.Process(e.ctx, eventCopy)
|
||||
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
|
||||
if eventCopy == nil {
|
||||
logger.Warningf("notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if eventCopy == nil {
|
||||
// 如果 eventCopy 为 nil,说明 eventCopy 被 processor drop 掉了, 不再发送通知
|
||||
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
|
||||
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
|
||||
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
// notify
|
||||
for i := range notifyRule.NotifyConfigs {
|
||||
if !NotifyRuleApplicable(¬ifyRule.NotifyConfigs[i], eventCopy) {
|
||||
err := NotifyRuleMatchCheck(¬ifyRule.NotifyConfigs[i], eventCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
continue
|
||||
}
|
||||
|
||||
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
|
||||
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
|
||||
if notifyChannel == nil {
|
||||
@@ -211,22 +204,74 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
continue
|
||||
}
|
||||
|
||||
if notifyChannel.RequestType != "flashduty" && messageTemplate == nil {
|
||||
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// todo go send
|
||||
// todo 聚合 event
|
||||
go e.sendV2([]*models.AlertCurEvent{eventCopy}, notifyRuleId, ¬ifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
|
||||
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, e.configCvalCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, ¬ifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
|
||||
if event == nil {
|
||||
// 如果 eventCopy 为 nil,说明 eventCopy 被 processor drop 掉了, 不再发送通知
|
||||
return true
|
||||
}
|
||||
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0,则不发送通知
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
|
||||
for _, pipelineConfig := range pipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
if !PipelineApplicable(eventPipeline, event) {
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
|
||||
for _, processor := range processors {
|
||||
var res string
|
||||
var err error
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
|
||||
event, res, err = processor.Process(ctx, event)
|
||||
if event == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
|
||||
|
||||
if from == "notify_rule" {
|
||||
// alert_rule 获取不到 eventId 记录没有意义
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
|
||||
}
|
||||
}
|
||||
|
||||
event.FE2DB()
|
||||
event.FillTagsMap()
|
||||
return event
|
||||
}
|
||||
|
||||
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
if pipeline == nil {
|
||||
return true
|
||||
}
|
||||
@@ -265,7 +310,7 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
return tagMatch && attributesMatch
|
||||
}
|
||||
|
||||
func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) bool {
|
||||
func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) error {
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := int(tm.Weekday())
|
||||
@@ -317,6 +362,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
}
|
||||
}
|
||||
|
||||
if !timeMatch {
|
||||
return fmt.Errorf("event time not match time filter")
|
||||
}
|
||||
|
||||
severityMatch := false
|
||||
for i := range notifyConfig.Severities {
|
||||
if notifyConfig.Severities[i] == event.Severity {
|
||||
@@ -324,6 +373,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
}
|
||||
}
|
||||
|
||||
if !severityMatch {
|
||||
return fmt.Errorf("event severity not match severity filter")
|
||||
}
|
||||
|
||||
tagMatch := true
|
||||
if len(notifyConfig.LabelKeys) > 0 {
|
||||
for i := range notifyConfig.LabelKeys {
|
||||
@@ -335,28 +388,38 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
|
||||
return false
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
}
|
||||
|
||||
if !tagMatch {
|
||||
return fmt.Errorf("event tag not match tag filter")
|
||||
}
|
||||
|
||||
attributesMatch := true
|
||||
if len(notifyConfig.Attributes) > 0 {
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
|
||||
return false
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
|
||||
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
|
||||
}
|
||||
|
||||
if !attributesMatch {
|
||||
return fmt.Errorf("event attributes not match attributes filter")
|
||||
}
|
||||
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
|
||||
return timeMatch && severityMatch && tagMatch && attributesMatch
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, map[string]string) {
|
||||
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, []string, map[string]string) {
|
||||
customParams := make(map[string]string)
|
||||
var flashDutyChannelIDs []int64
|
||||
var pagerDutyRoutingKeys []string
|
||||
var userInfoParams models.CustomParams
|
||||
|
||||
for key, value := range notifyConfig.Params {
|
||||
@@ -374,13 +437,26 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
|
||||
}
|
||||
}
|
||||
}
|
||||
case "pagerduty_integration_keys", "pagerduty_integration_ids":
|
||||
if key == "pagerduty_integration_ids" {
|
||||
// 不处理ids,直接跳过,这个字段只给前端标记用
|
||||
continue
|
||||
}
|
||||
if data, err := json.Marshal(value); err == nil {
|
||||
var keys []string
|
||||
if json.Unmarshal(data, &keys) == nil {
|
||||
pagerDutyRoutingKeys = keys
|
||||
break
|
||||
}
|
||||
}
|
||||
default:
|
||||
// 避免直接 value.(string) 导致 panic,支持多种类型并统一为字符串
|
||||
customParams[key] = value.(string)
|
||||
}
|
||||
}
|
||||
|
||||
if len(userInfoParams.UserIDs) == 0 && len(userInfoParams.UserGroupIDs) == 0 {
|
||||
return []string{}, flashDutyChannelIDs, customParams
|
||||
return []string{}, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
|
||||
}
|
||||
|
||||
userIds := make([]int64, 0)
|
||||
@@ -416,18 +492,20 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
|
||||
visited[user.Id] = true
|
||||
}
|
||||
|
||||
return sendtos, flashDutyChannelIDs, customParams
|
||||
return sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
|
||||
}
|
||||
|
||||
func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
|
||||
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, configCvalCache *memsto.CvalCache,
|
||||
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
|
||||
if len(events) == 0 {
|
||||
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
|
||||
return
|
||||
}
|
||||
|
||||
siteInfo := configCvalCache.GetSiteInfo()
|
||||
tplContent := make(map[string]interface{})
|
||||
if notifyChannel.RequestType != "flashduty" {
|
||||
tplContent = messageTemplate.RenderEvent(events)
|
||||
tplContent = messageTemplate.RenderEvent(events, siteInfo.SiteUrl)
|
||||
}
|
||||
|
||||
var contactKey string
|
||||
@@ -435,10 +513,7 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
|
||||
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
|
||||
}
|
||||
|
||||
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, e.userCache, e.userGroupCache)
|
||||
|
||||
e.Astats.GaugeNotifyRecordQueueSize.Inc()
|
||||
defer e.Astats.GaugeNotifyRecordQueueSize.Dec()
|
||||
sendtos, flashDutyChannelIDs, pagerdutyRoutingKeys, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
|
||||
|
||||
switch notifyChannel.RequestType {
|
||||
case "flashduty":
|
||||
@@ -447,43 +522,51 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
|
||||
}
|
||||
|
||||
for i := range flashDutyChannelIDs {
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
}
|
||||
return
|
||||
|
||||
case "pagerduty":
|
||||
for _, routingKey := range pagerdutyRoutingKeys {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
|
||||
}
|
||||
|
||||
case "http":
|
||||
if e.notifyChannelCache.HttpConcurrencyAdd(notifyChannel.ID) {
|
||||
defer e.notifyChannelCache.HttpConcurrencyDone(notifyChannel.ID)
|
||||
}
|
||||
if notifyChannel.RequestConfig == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, request config not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
// 使用队列模式处理 http 通知
|
||||
// 创建通知任务
|
||||
task := &memsto.NotifyTask{
|
||||
Events: events,
|
||||
NotifyRuleId: notifyRuleId,
|
||||
NotifyChannel: notifyChannel,
|
||||
TplContent: tplContent,
|
||||
CustomParams: customParams,
|
||||
Sendtos: sendtos,
|
||||
}
|
||||
|
||||
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, http request config not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
}
|
||||
|
||||
if NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
|
||||
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos, resp, err)
|
||||
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), resp, err)
|
||||
} else {
|
||||
for i := range sendtos {
|
||||
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, []string{sendtos[i]}), resp, err)
|
||||
}
|
||||
// 将任务加入队列
|
||||
success := notifyChannelCache.EnqueueNotifyTask(task)
|
||||
if !success {
|
||||
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
|
||||
// 如果入队失败,记录错误通知
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
|
||||
}
|
||||
|
||||
case "smtp":
|
||||
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, e.notifyChannelCache.GetSmtpClient(notifyChannel.ID))
|
||||
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
|
||||
|
||||
case "script":
|
||||
start := time.Now()
|
||||
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
default:
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
}
|
||||
@@ -498,6 +581,11 @@ func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
|
||||
// event: 告警/恢复事件
|
||||
// isSubscribe: 告警事件是否由subscribe的配置产生
|
||||
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
|
||||
go e.HandleEventWithNotifyRule(event)
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
rule := e.alertRuleCache.Get(event.RuleId)
|
||||
if rule == nil {
|
||||
return
|
||||
@@ -530,7 +618,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
|
||||
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
|
||||
}
|
||||
|
||||
go e.HandleEventWithNotifyRule(event)
|
||||
go e.Send(rule, event, notifyTarget, isSubscribe)
|
||||
|
||||
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
|
||||
@@ -570,6 +657,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
|
||||
return
|
||||
}
|
||||
|
||||
if !sub.MatchCate(event.Cate) {
|
||||
return
|
||||
}
|
||||
|
||||
if !common.MatchTags(event.TagsMap, sub.ITags) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
|
||||
}
|
||||
|
||||
logger.Infof(
|
||||
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d %s",
|
||||
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
event.Hash,
|
||||
status,
|
||||
location,
|
||||
@@ -29,6 +29,7 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
|
||||
event.TagsJSON,
|
||||
event.TriggerValue,
|
||||
event.TriggerTime,
|
||||
event.LastEvalTime,
|
||||
message,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -144,14 +144,24 @@ func (arw *AlertRuleWorker) Start() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Eval() {
|
||||
logger.Infof("eval:%s started", arw.Key())
|
||||
begin := time.Now()
|
||||
var message string
|
||||
|
||||
defer func() {
|
||||
if len(message) == 0 {
|
||||
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
|
||||
} else {
|
||||
logger.Infof("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
|
||||
}
|
||||
}()
|
||||
|
||||
if arw.Processor.PromEvalInterval == 0 {
|
||||
arw.Processor.PromEvalInterval = getPromEvalInterval(arw.Processor.ScheduleEntry.Schedule)
|
||||
}
|
||||
|
||||
cachedRule := arw.Rule
|
||||
if cachedRule == nil {
|
||||
// logger.Errorf("rule_eval:%s Rule not found", arw.Key())
|
||||
message = "rule not found"
|
||||
return
|
||||
}
|
||||
arw.Processor.Stats.CounterRuleEval.WithLabelValues().Inc()
|
||||
@@ -177,11 +187,12 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
|
||||
message = "failed to get anomaly points"
|
||||
return
|
||||
}
|
||||
|
||||
if arw.Processor == nil {
|
||||
logger.Warningf("rule_eval:%s Processor is nil", arw.Key())
|
||||
message = "processor is nil"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -223,7 +234,7 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
logger.Infof("rule_eval %s stopped", arw.Key())
|
||||
logger.Infof("rule_eval:%s stopped", arw.Key())
|
||||
close(arw.Quit)
|
||||
c := arw.Scheduler.Stop()
|
||||
<-c.Done()
|
||||
@@ -275,7 +286,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
|
||||
if query.VarEnabled {
|
||||
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
|
||||
var anomalyPoints []models.AnomalyPoint
|
||||
if hasLabelLossAggregator(query) || notExactMatch(query) {
|
||||
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低
|
||||
@@ -375,10 +386,21 @@ type sample struct {
|
||||
// 每个节点先查询无参数的 query, 即 mem_used_percent{} > curVal, 得到满足值变量的所有结果
|
||||
// 结果中有满足本节点参数变量的值,加入异常点列表
|
||||
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
|
||||
// VarFillingAfterQuery 先查询再过滤变量,效率较高,但无法处理有聚合函数导致标签丢失的情况
|
||||
//
|
||||
// 修复说明 (Issue #2971):
|
||||
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
|
||||
// 修复方案:
|
||||
// 1. 同一层内:使用完整的标签 hash 作为 key,避免不同时序数据覆盖
|
||||
// 2. 跨层级时:子层按参数变量组合前缀删除父层的所有相关告警,实现子筛选覆盖父筛选
|
||||
func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
|
||||
varToLabel := ExtractVarMapping(query.PromQl)
|
||||
fullQuery := removeVal(query.PromQl)
|
||||
// 存储所有的异常点,key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
|
||||
// 存储所有的异常点
|
||||
// key 格式: {参数变量组合}@@{标签hash}
|
||||
// 这样可以:
|
||||
// 1. 同层内不同时序数据有不同的 key(标签hash不同)
|
||||
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
|
||||
anomalyPointsMap := make(map[string]models.AnomalyPoint)
|
||||
// 统一变量配置格式
|
||||
VarConfigForCalc := &models.ChildVarConfig{
|
||||
@@ -405,7 +427,16 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
})
|
||||
// 遍历变量配置链表
|
||||
curNode := VarConfigForCalc
|
||||
isFirstLayer := true
|
||||
for curNode != nil {
|
||||
// 当前层收集到的所有异常点,按参数组合分组
|
||||
// key: 参数变量组合, value: 该组合下的所有异常点及其完整key
|
||||
type pointWithKey struct {
|
||||
point models.AnomalyPoint
|
||||
fullKey string
|
||||
}
|
||||
currentLayerPointsByParam := make(map[string][]pointWithKey)
|
||||
|
||||
for _, param := range curNode.ParamVal {
|
||||
// curQuery 当前节点的无参数 query,用于时序库查询
|
||||
curQuery := fullQuery
|
||||
@@ -447,8 +478,13 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
curRealQuery = fillVar(curRealQuery, paramKey, val)
|
||||
}
|
||||
|
||||
if _, ok := paramPermutation[strings.Join(cur, JoinMark)]; ok {
|
||||
anomalyPointsMap[strings.Join(cur, JoinMark)] = models.AnomalyPoint{
|
||||
paramKey := strings.Join(cur, JoinMark)
|
||||
if _, ok := paramPermutation[paramKey]; ok {
|
||||
// 计算标签 hash,确保不同的时序数据有不同的 key
|
||||
tagHash := hash.GetTagHash(seqVals[i].Metric)
|
||||
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
|
||||
|
||||
point := models.AnomalyPoint{
|
||||
Key: seqVals[i].Metric.String(),
|
||||
Timestamp: seqVals[i].Timestamp.Unix(),
|
||||
Value: float64(seqVals[i].Value),
|
||||
@@ -456,17 +492,44 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
Severity: query.Severity,
|
||||
Query: curRealQuery,
|
||||
}
|
||||
// 生成异常点后,删除该参数组合
|
||||
delete(paramPermutation, strings.Join(cur, JoinMark))
|
||||
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], pointWithKey{point: point, fullKey: fullKey})
|
||||
}
|
||||
}
|
||||
|
||||
// 剩余的参数组合为本层筛选不产生异常点的组合,需要覆盖上层筛选中产生的异常点
|
||||
for k, _ := range paramPermutation {
|
||||
delete(anomalyPointsMap, k)
|
||||
// 初始化空的参数组合(用于子层覆盖父层的场景)
|
||||
for paramKey := range paramPermutation {
|
||||
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
|
||||
currentLayerPointsByParam[paramKey] = []pointWithKey{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 处理当前层的结果
|
||||
for paramKey, pointsWithKeys := range currentLayerPointsByParam {
|
||||
if !isFirstLayer {
|
||||
// 非首层(子层):先删除父层中该参数组合的所有告警
|
||||
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
|
||||
keysToDelete := make([]string, 0)
|
||||
for k := range anomalyPointsMap {
|
||||
// key 格式: {参数组合}@@{标签hash}
|
||||
// 检查是否以当前参数组合开头(后面跟着 JoinMark)
|
||||
if strings.HasPrefix(k, paramKey+JoinMark) {
|
||||
keysToDelete = append(keysToDelete, k)
|
||||
}
|
||||
}
|
||||
for _, k := range keysToDelete {
|
||||
delete(anomalyPointsMap, k)
|
||||
}
|
||||
}
|
||||
|
||||
// 添加当前层的所有异常点
|
||||
for _, pwk := range pointsWithKeys {
|
||||
anomalyPointsMap[pwk.fullKey] = pwk.point
|
||||
}
|
||||
}
|
||||
|
||||
curNode = curNode.ChildVarConfigs
|
||||
isFirstLayer = false
|
||||
}
|
||||
|
||||
anomalyPoints := make([]models.AnomalyPoint, 0)
|
||||
@@ -1066,15 +1129,15 @@ func exclude(reHashTagIndex1 map[uint64][][]uint64, reHashTagIndex2 map[uint64][
|
||||
|
||||
func MakeSeriesMap(series []models.DataResp, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) {
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
seriesStore[seriesHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1217,9 +1280,20 @@ func GetQueryRefAndUnit(query interface{}) (string, string, error) {
|
||||
// 每个节点先填充参数再进行查询, 即先得到完整的 promql avg(mem_used_percent{host="127.0.0.1"}) > 5
|
||||
// 再查询得到满足值变量的所有结果加入异常点列表
|
||||
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
|
||||
//
|
||||
// 修复说明 (Issue #2971):
|
||||
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
|
||||
// 修复方案:
|
||||
// 1. 同一层内:使用完整的标签 hash 作为 key,避免不同时序数据覆盖
|
||||
// 2. 跨层级时:子层按参数变量组合删除父层的所有相关告警,实现子筛选覆盖父筛选
|
||||
func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
|
||||
varToLabel := ExtractVarMapping(query.PromQl)
|
||||
// 存储异常点的 map,key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
|
||||
|
||||
// 存储异常点的 map
|
||||
// key 格式: {参数变量组合}@@{标签hash}
|
||||
// 这样可以:
|
||||
// 1. 同层内不同时序数据有不同的 key(标签hash不同)
|
||||
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
|
||||
anomalyPointsMap := sync.Map{}
|
||||
// 统一变量配置格式
|
||||
VarConfigForCalc := &models.ChildVarConfig{
|
||||
@@ -1244,11 +1318,19 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
sort.Slice(ParamKeys, func(i, j int) bool {
|
||||
return ParamKeys[i] < ParamKeys[j]
|
||||
})
|
||||
// 遍历变量配置链表
|
||||
|
||||
// 遍历变量配置链表(父层 -> 子层)
|
||||
curNode := VarConfigForCalc
|
||||
isFirstLayer := true
|
||||
for curNode != nil {
|
||||
// 当前层收集到的所有异常点,按参数组合分组
|
||||
// key: 参数变量组合, value: 该组合下的所有异常点
|
||||
currentLayerPointsByParam := make(map[string][]models.AnomalyPoint)
|
||||
var currentLayerMutex sync.Mutex
|
||||
|
||||
for _, param := range curNode.ParamVal {
|
||||
curPromql := query.PromQl
|
||||
|
||||
// 取出阈值变量
|
||||
valMap := make(map[string]string)
|
||||
for val, valQuery := range param {
|
||||
@@ -1268,7 +1350,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
}
|
||||
|
||||
keyToPromql := make(map[string]string)
|
||||
for paramPermutationKeys, _ := range paramPermutation {
|
||||
for paramPermutationKeys := range paramPermutation {
|
||||
realPromql := curPromql
|
||||
split := strings.Split(paramPermutationKeys, JoinMark)
|
||||
for j := range ParamKeys {
|
||||
@@ -1280,15 +1362,20 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
// 并发查询
|
||||
wg := sync.WaitGroup{}
|
||||
semaphore := make(chan struct{}, 200)
|
||||
for key, promql := range keyToPromql {
|
||||
for paramKey, promql := range keyToPromql {
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{}
|
||||
go func(key, promql string) {
|
||||
go func(paramKey, promql string) {
|
||||
defer func() {
|
||||
<-semaphore
|
||||
wg.Done()
|
||||
}()
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(
|
||||
fmt.Sprintf("%d", arw.DatasourceId),
|
||||
fmt.Sprintf("%d", arw.Rule.Id),
|
||||
).Inc()
|
||||
|
||||
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
|
||||
@@ -1298,29 +1385,67 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
if len(points) == 0 {
|
||||
anomalyPointsMap.Delete(key)
|
||||
// 查询无结果时,标记该参数组合需要清除(用于子层覆盖父层)
|
||||
currentLayerMutex.Lock()
|
||||
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
|
||||
currentLayerPointsByParam[paramKey] = []models.AnomalyPoint{}
|
||||
}
|
||||
currentLayerMutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < len(points); i++ {
|
||||
points[i].Severity = query.Severity
|
||||
points[i].Query = promql
|
||||
points[i].ValuesUnit = map[string]unit.FormattedValue{
|
||||
"v": unit.ValueFormatter(query.Unit, 2, points[i].Value),
|
||||
}
|
||||
// 每个异常点都需要生成 key,子筛选使用 key 覆盖上层筛选,解决 issue https://github.com/ccfos/nightingale/issues/2433 提的问题
|
||||
var cur []string
|
||||
for _, paramKey := range ParamKeys {
|
||||
val := string(points[i].Labels[model.LabelName(varToLabel[paramKey])])
|
||||
cur = append(cur, val)
|
||||
}
|
||||
anomalyPointsMap.Store(strings.Join(cur, JoinMark), points[i])
|
||||
}
|
||||
}(key, promql)
|
||||
|
||||
// 收集当前层的异常点
|
||||
currentLayerMutex.Lock()
|
||||
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], points...)
|
||||
currentLayerMutex.Unlock()
|
||||
}(paramKey, promql)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// 处理当前层的结果
|
||||
for paramKey, points := range currentLayerPointsByParam {
|
||||
if !isFirstLayer {
|
||||
// 非首层(子层):先删除父层中该参数组合的所有告警
|
||||
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
|
||||
keysToDelete := make([]string, 0)
|
||||
anomalyPointsMap.Range(func(k, v any) bool {
|
||||
keyStr := k.(string)
|
||||
// key 格式: {参数组合}@@{标签hash}
|
||||
// 检查是否以当前参数组合开头
|
||||
if strings.HasPrefix(keyStr, paramKey+JoinMark) {
|
||||
keysToDelete = append(keysToDelete, keyStr)
|
||||
}
|
||||
return true
|
||||
})
|
||||
for _, k := range keysToDelete {
|
||||
anomalyPointsMap.Delete(k)
|
||||
}
|
||||
}
|
||||
|
||||
// 添加当前层的所有异常点
|
||||
// 使用 参数组合 + 标签hash 作为 key,保证同一参数值下的不同时序数据不会互相覆盖
|
||||
for _, point := range points {
|
||||
// 计算标签 hash,确保不同的时序数据有不同的 key
|
||||
tagHash := hash.GetTagHash(point.Labels)
|
||||
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
|
||||
anomalyPointsMap.Store(fullKey, point)
|
||||
}
|
||||
}
|
||||
|
||||
curNode = curNode.ChildVarConfigs
|
||||
isFirstLayer = false
|
||||
}
|
||||
|
||||
// 收集所有异常点
|
||||
anomalyPoints := make([]models.AnomalyPoint, 0)
|
||||
anomalyPointsMap.Range(func(key, value any) bool {
|
||||
if point, ok := value.(models.AnomalyPoint); ok {
|
||||
@@ -1328,6 +1453,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
return anomalyPoints
|
||||
}
|
||||
|
||||
@@ -1497,15 +1623,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
seriesStore[seriesHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
|
||||
}
|
||||
ref, err := GetQueryRef(query)
|
||||
if err != nil {
|
||||
@@ -1539,8 +1665,8 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
var ts int64
|
||||
var sample models.DataResp
|
||||
var value float64
|
||||
for _, serieHash := range seriesHash {
|
||||
series, exists := seriesStore[serieHash]
|
||||
for _, seriesHash := range seriesHash {
|
||||
series, exists := seriesStore[seriesHash]
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
|
||||
continue
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mute
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -9,6 +10,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -135,7 +137,8 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
|
||||
}
|
||||
|
||||
for i := 0; i < len(mutes); i++ {
|
||||
if MatchMute(event, mutes[i]) {
|
||||
matched, _ := MatchMute(event, mutes[i])
|
||||
if matched {
|
||||
return true, mutes[i].Id
|
||||
}
|
||||
}
|
||||
@@ -144,27 +147,21 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
|
||||
}
|
||||
|
||||
// MatchMute 如果传入了clock这个可选参数,就表示使用这个clock表示的时间,否则就从event的字段中取TriggerTime
|
||||
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
|
||||
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) (bool, error) {
|
||||
if mute.Disabled == 1 {
|
||||
return false
|
||||
return false, errors.New("mute is disabled")
|
||||
}
|
||||
|
||||
// 如果不是全局的,判断 匹配的 datasource id
|
||||
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
|
||||
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
|
||||
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
|
||||
idm[mute.DatasourceIdsJson[i]] = struct{}{}
|
||||
}
|
||||
|
||||
// 判断 event.datasourceId 是否包含在 idm 中
|
||||
if _, has := idm[event.DatasourceId]; !has {
|
||||
return false
|
||||
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
|
||||
return false, errors.New("datasource id not match")
|
||||
}
|
||||
}
|
||||
|
||||
if mute.MuteTimeType == models.TimeRange {
|
||||
if !mute.IsWithinTimeRange(event.TriggerTime) {
|
||||
return false
|
||||
return false, errors.New("event trigger time not within mute time range")
|
||||
}
|
||||
} else if mute.MuteTimeType == models.Periodic {
|
||||
ts := event.TriggerTime
|
||||
@@ -173,11 +170,11 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
}
|
||||
|
||||
if !mute.IsWithinPeriodicMute(ts) {
|
||||
return false
|
||||
return false, errors.New("event trigger time not within periodic mute range")
|
||||
}
|
||||
} else {
|
||||
logger.Warningf("mute time type invalid, %d", mute.MuteTimeType)
|
||||
return false
|
||||
return false, errors.New("mute time type invalid")
|
||||
}
|
||||
|
||||
var matchSeverity bool
|
||||
@@ -193,12 +190,14 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
}
|
||||
|
||||
if !matchSeverity {
|
||||
return false
|
||||
return false, errors.New("event severity not match mute severity")
|
||||
}
|
||||
|
||||
if mute.ITags == nil || len(mute.ITags) == 0 {
|
||||
return true
|
||||
if len(mute.ITags) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return common.MatchTags(event.TagsMap, mute.ITags)
|
||||
if !common.MatchTags(event.TagsMap, mute.ITags) {
|
||||
return false, errors.New("event tags not match mute tags")
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ func (n *Naming) heartbeat() error {
|
||||
newDatasource[datasourceIds[i]] = struct{}{}
|
||||
servers, err := n.ActiveServers(datasourceIds[i])
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
logger.Warningf("heartbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
continue
|
||||
}
|
||||
@@ -148,7 +148,7 @@ func (n *Naming) heartbeat() error {
|
||||
|
||||
servers, err := n.ActiveServersByEngineName()
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
|
||||
logger.Warningf("heartbeat %d get active server err:%v", HostDatasource, err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
@@ -17,7 +18,6 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -55,26 +55,23 @@ func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
if err := c.initHTTPClient(); err != nil {
|
||||
logger.Errorf("failed to initialize HTTP client: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备告警事件信息
|
||||
eventInfo, err := c.prepareEventInfo(event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to prepare event info: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 调用AI模型生成总结
|
||||
summary, err := c.generateAISummary(eventInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to generate AI summary: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 将总结添加到annotations字段
|
||||
@@ -86,12 +83,11 @@ func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
// 更新Annotations字段
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal annotations: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
}
|
||||
event.Annotations = string(b)
|
||||
|
||||
return event
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) initHTTPClient() error {
|
||||
@@ -137,7 +133,7 @@ func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string,
|
||||
func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
|
||||
// 构建基础请求参数
|
||||
reqParams := map[string]interface{}{
|
||||
"model": c.ModelName,
|
||||
"model": c.ModelName,
|
||||
"messages": []Message{
|
||||
{
|
||||
Role: "user",
|
||||
@@ -148,7 +144,11 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
|
||||
|
||||
// 合并自定义参数
|
||||
for k, v := range c.CustomParams {
|
||||
reqParams[k] = v
|
||||
converted, err := convertCustomParam(v)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
|
||||
}
|
||||
reqParams[k] = converted
|
||||
}
|
||||
|
||||
// 序列化请求体
|
||||
@@ -201,3 +201,44 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
|
||||
|
||||
return chatResp.Choices[0].Message.Content, nil
|
||||
}
|
||||
|
||||
// convertCustomParam 将前端传入的参数转换为正确的类型
|
||||
func convertCustomParam(value interface{}) (interface{}, error) {
|
||||
if value == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// 如果是字符串,尝试转换为其他类型
|
||||
if str, ok := value.(string); ok {
|
||||
// 尝试转换为数字
|
||||
if f, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
// 检查是否为整数
|
||||
if f == float64(int64(f)) {
|
||||
return int64(f), nil
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// 尝试转换为布尔值
|
||||
if b, err := strconv.ParseBool(str); err == nil {
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// 尝试解析为JSON数组
|
||||
if strings.HasPrefix(strings.TrimSpace(str), "[") {
|
||||
var arr []interface{}
|
||||
if err := json.Unmarshal([]byte(str), &arr); err == nil {
|
||||
return arr, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 尝试解析为JSON对象
|
||||
if strings.HasPrefix(strings.TrimSpace(str), "{") {
|
||||
var obj map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(str), &obj); err == nil {
|
||||
return obj, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return value, nil
|
||||
}
|
||||
|
||||
@@ -54,7 +54,8 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
assert.NotNil(t, processor)
|
||||
|
||||
// 测试处理函数
|
||||
result := processor.Process(&ctx.Context{}, event)
|
||||
result, _, err := processor.Process(&ctx.Context{}, event)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
|
||||
|
||||
@@ -66,3 +67,73 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
|
||||
}
|
||||
|
||||
func TestConvertCustomParam(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input interface{}
|
||||
expected interface{}
|
||||
hasError bool
|
||||
}{
|
||||
{
|
||||
name: "nil value",
|
||||
input: nil,
|
||||
expected: nil,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string number to int64",
|
||||
input: "123",
|
||||
expected: int64(123),
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string float to float64",
|
||||
input: "123.45",
|
||||
expected: 123.45,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string boolean to bool",
|
||||
input: "true",
|
||||
expected: true,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string false to bool",
|
||||
input: "false",
|
||||
expected: false,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "JSON array string to slice",
|
||||
input: `["a", "b", "c"]`,
|
||||
expected: []interface{}{"a", "b", "c"},
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "JSON object string to map",
|
||||
input: `{"key": "value", "num": 123}`,
|
||||
expected: map[string]interface{}{"key": "value", "num": float64(123)},
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "plain string remains string",
|
||||
input: "hello world",
|
||||
expected: "hello world",
|
||||
hasError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
converted, err := convertCustomParam(test.input)
|
||||
if test.hasError {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.expected, converted)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package callback
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -42,7 +43,7 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -51,7 +52,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to parse proxy url: %v", err)
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -71,14 +72,12 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal event: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create request: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -91,16 +90,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send request: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to read response body: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
logger.Infof("response body: %s", string(b))
|
||||
return event
|
||||
logger.Debugf("callback processor response body: %s", string(b))
|
||||
return event, "callback success", nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package eventdrop
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
texttemplate "text/template"
|
||||
|
||||
@@ -25,7 +26,7 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
|
||||
// 在标签过滤和属性过滤都不满足需求时可以使用
|
||||
// 如果模板执行结果为 true,则删除该事件
|
||||
@@ -40,22 +41,20 @@ func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
|
||||
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
logger.Errorf("processor failed to parse template: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, event); err != nil {
|
||||
logger.Errorf("processor failed to execute template: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(body.String())
|
||||
logger.Infof("processor eventdrop result: %v", result)
|
||||
if result == "true" {
|
||||
logger.Infof("processor eventdrop drop event: %v", event)
|
||||
return nil
|
||||
return nil, "drop event success", nil
|
||||
}
|
||||
|
||||
return event
|
||||
return event, "drop event failed", nil
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package eventupdate
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -30,7 +31,7 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -39,7 +40,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to parse proxy url: %v", err)
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -59,14 +60,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal event: %v", err)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create request: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -79,22 +78,19 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send request: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to read response body: %v event: %v", err, event)
|
||||
return event
|
||||
return nil, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
logger.Infof("response body: %s", string(b))
|
||||
logger.Debugf("event update processor response body: %s", string(b))
|
||||
|
||||
err = json.Unmarshal(b, &event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to unmarshal response body: %v event: %v", err, event)
|
||||
return event
|
||||
return event, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
return event
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
|
||||
for i := range r.SourceLabels {
|
||||
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
|
||||
@@ -64,7 +64,7 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *
|
||||
}
|
||||
|
||||
EventRelabel(event, relabelConfigs)
|
||||
return event
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
|
||||
|
||||
@@ -26,8 +26,6 @@ import (
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
type ExternalProcessorsType struct {
|
||||
ExternalLock sync.RWMutex
|
||||
Processors map[string]*Processor
|
||||
@@ -76,7 +74,6 @@ type Processor struct {
|
||||
|
||||
HandleFireEventHook HandleEventFunc
|
||||
HandleRecoverEventHook HandleEventFunc
|
||||
EventMuteHook EventMuteHookFunc
|
||||
|
||||
ScheduleEntry cron.Entry
|
||||
PromEvalInterval int
|
||||
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
|
||||
|
||||
HandleFireEventHook: func(event *models.AlertCurEvent) {},
|
||||
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
|
||||
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
|
||||
}
|
||||
|
||||
p.mayHandleGroup()
|
||||
@@ -135,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
p.inhibit = inhibit
|
||||
cachedRule := p.alertRuleCache.Get(p.rule.Id)
|
||||
if cachedRule == nil {
|
||||
logger.Errorf("rule not found %+v", anomalyPoints)
|
||||
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
return
|
||||
}
|
||||
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
|
||||
hash := event.Hash
|
||||
alertingKeys[hash] = struct{}{}
|
||||
|
||||
// event processor
|
||||
eventCopy := event.DeepCopy()
|
||||
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
|
||||
if event == nil {
|
||||
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
// event mute
|
||||
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
|
||||
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
continue
|
||||
}
|
||||
|
||||
if p.EventMuteHook(event) {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
|
||||
if dispatch.EventMuteHook(event) {
|
||||
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -428,17 +434,18 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
continue
|
||||
}
|
||||
|
||||
var preTriggerTime int64 // 第一个 pending event 的触发时间
|
||||
var preEvalTime int64 // 第一个 pending event 的检测时间
|
||||
preEvent, has := p.pendings.Get(event.Hash)
|
||||
if has {
|
||||
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
preTriggerTime = preEvent.TriggerTime
|
||||
preEvalTime = preEvent.FirstEvalTime
|
||||
} else {
|
||||
event.FirstEvalTime = event.LastEvalTime
|
||||
p.pendings.Set(event.Hash, event)
|
||||
preTriggerTime = event.TriggerTime
|
||||
preEvalTime = event.FirstEvalTime
|
||||
}
|
||||
|
||||
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
|
||||
if event.LastEvalTime-preEvalTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
|
||||
fireEvents = append(fireEvents, event)
|
||||
if severity > event.Severity {
|
||||
severity = event.Severity
|
||||
@@ -467,16 +474,18 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
|
||||
message := "unknown"
|
||||
defer func() {
|
||||
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
|
||||
}()
|
||||
|
||||
if fired, has := p.fires.Get(event.Hash); has {
|
||||
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
event.FirstTriggerTime = fired.FirstTriggerTime
|
||||
p.HandleFireEventHook(event)
|
||||
|
||||
if cachedRule.NotifyRepeatStep == 0 {
|
||||
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
|
||||
// 说明不想重复通知,那就直接返回了,nothing to do
|
||||
// do not need to send alert again
|
||||
message = "stalled, rule.notify_repeat_step is 0, no need to repeat notify"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -485,21 +494,26 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
if cachedRule.NotifyMaxNumber == 0 {
|
||||
// 最大可以发送次数如果是0,表示不想限制最大发送次数,一直发即可
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_ignore(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
p.pushEventToQueue(event)
|
||||
} else {
|
||||
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
|
||||
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
|
||||
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
|
||||
message = fmt.Sprintf("stalled, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_not_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, fired.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
return
|
||||
} else {
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
message = fmt.Sprintf("stalled, notify_repeat_step_not_matched(%d < %d + %d * 60)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep)
|
||||
}
|
||||
} else {
|
||||
event.NotifyCurNumber = 1
|
||||
event.FirstTriggerTime = event.TriggerTime
|
||||
message = fmt.Sprintf("fired, first_trigger_time: %d", event.FirstTriggerTime)
|
||||
p.HandleFireEventHook(event)
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
@@ -577,7 +591,9 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
|
||||
}
|
||||
|
||||
// handle rule tags
|
||||
for _, tag := range p.rule.AppendTagsJSON {
|
||||
tags := p.rule.AppendTagsJSON
|
||||
tags = append(tags, "rulename="+p.rule.Name)
|
||||
for _, tag := range tags {
|
||||
arr := strings.SplitN(tag, "=", 2)
|
||||
|
||||
var defs = []string{
|
||||
@@ -603,8 +619,6 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
|
||||
|
||||
tagsMap[arr[0]] = body.String()
|
||||
}
|
||||
|
||||
tagsMap["rulename"] = p.rule.Name
|
||||
p.tagsMap = tagsMap
|
||||
|
||||
// handle tagsArr
|
||||
|
||||
@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
if event.RuleId == 0 {
|
||||
ginx.Bomb(200, "event is illegal")
|
||||
}
|
||||
event.FE2DB()
|
||||
|
||||
event.TagsMap = make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
event.TagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
if hit {
|
||||
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strings"
|
||||
@@ -134,13 +135,15 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
|
||||
|
||||
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
|
||||
stats *astats.Stats, events []*models.AlertCurEvent) {
|
||||
start := time.Now()
|
||||
res, err := doSend(url, body, channel, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, events, 0, channel, token, res, err)
|
||||
}
|
||||
|
||||
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID int64, channel, target, res string, err error) {
|
||||
// 一个通知可能对应多个 event,都需要记录
|
||||
notis := make([]*models.NotificaitonRecord, 0, len(evts))
|
||||
notis := make([]*models.NotificationRecord, 0, len(evts))
|
||||
for _, evt := range evts {
|
||||
noti := models.NewNotificationRecord(evt, notifyRuleID, channel, target)
|
||||
if err != nil {
|
||||
@@ -166,7 +169,9 @@ func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID i
|
||||
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
|
||||
start := time.Now()
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
res = []byte(fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res))
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
|
||||
@@ -141,7 +141,7 @@ func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
|
||||
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
|
||||
conf := smtp
|
||||
if conf.Host == "" || conf.Port == 0 {
|
||||
logger.Warning("SMTP configurations invalid")
|
||||
logger.Debug("SMTP configurations invalid")
|
||||
<-mailQuit
|
||||
return
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ func ReportNotifyRecordQueueSize(stats *astats.Stats) {
|
||||
|
||||
// 推送通知记录到队列
|
||||
// 若队列满 则返回 error
|
||||
func PushNotifyRecords(records []*models.NotificaitonRecord) error {
|
||||
func PushNotifyRecords(records []*models.NotificationRecord) error {
|
||||
for _, record := range records {
|
||||
if ok := NotifyRecordQueue.PushFront(record); !ok {
|
||||
logger.Warningf("notify record queue is full, record: %+v", record)
|
||||
@@ -59,16 +59,16 @@ func (c *NotifyRecordConsumer) LoopConsume() {
|
||||
}
|
||||
|
||||
// 类型转换,不然 CreateInBatches 会报错
|
||||
notis := make([]*models.NotificaitonRecord, 0, len(inotis))
|
||||
notis := make([]*models.NotificationRecord, 0, len(inotis))
|
||||
for _, inoti := range inotis {
|
||||
notis = append(notis, inoti.(*models.NotificaitonRecord))
|
||||
notis = append(notis, inoti.(*models.NotificationRecord))
|
||||
}
|
||||
|
||||
c.consume(notis)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *NotifyRecordConsumer) consume(notis []*models.NotificaitonRecord) {
|
||||
func (c *NotifyRecordConsumer) consume(notis []*models.NotificationRecord) {
|
||||
if err := models.DB(c.ctx).CreateInBatches(notis, 100).Error; err != nil {
|
||||
logger.Errorf("add notis:%v failed, err: %v", notis, err)
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
|
||||
channel := "script"
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
fpath := ".notify_scriptt"
|
||||
fpath := ".notify_script"
|
||||
if config.Type == 1 {
|
||||
fpath = config.Content
|
||||
} else {
|
||||
@@ -79,6 +79,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
|
||||
start := time.Now()
|
||||
err := startCmd(cmd)
|
||||
if err != nil {
|
||||
logger.Errorf("event_script_notify_fail: run cmd err: %v", err)
|
||||
@@ -88,6 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
|
||||
|
||||
res := buf.String()
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
|
||||
// 截断超出长度的输出
|
||||
if len(res) > 512 {
|
||||
|
||||
@@ -37,7 +37,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
|
||||
req, err := http.NewRequest("POST", conf.Url, bf)
|
||||
if err != nil {
|
||||
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
|
||||
logger.Warningf("%s alertingWebhook failed to new request event:%s err:%v", channel, string(bs), err)
|
||||
return true, "", err
|
||||
}
|
||||
|
||||
@@ -99,7 +99,9 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
for _, conf := range webhooks {
|
||||
retryCount := 0
|
||||
for retryCount < 3 {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(conf, event, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
@@ -169,7 +171,9 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
|
||||
|
||||
retryCount := 0
|
||||
for retryCount < webhook.RetryCount {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(webhook, events, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
|
||||
@@ -43,4 +43,16 @@ var Plugins = []Plugin{
|
||||
Type: "pgsql",
|
||||
TypeName: "PostgreSQL",
|
||||
},
|
||||
{
|
||||
Id: 8,
|
||||
Category: "logging",
|
||||
Type: "doris",
|
||||
TypeName: "Doris",
|
||||
},
|
||||
{
|
||||
Id: 9,
|
||||
Category: "logging",
|
||||
Type: "opensearch",
|
||||
TypeName: "OpenSearch",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -2,10 +2,13 @@ package center
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/dispatch"
|
||||
@@ -96,6 +99,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
models.MigrateEP(ctx)
|
||||
}
|
||||
|
||||
// 初始化 siteUrl,如果为空则设置默认值
|
||||
InitSiteUrl(ctx, config.Alert.Heartbeat.IP, config.HTTP.Port)
|
||||
|
||||
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
@@ -121,7 +127,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
macros.RegisterMacro(macros.MacroInVain)
|
||||
dscache.Init(ctx, false)
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
writers := writer.NewWriters(config.Pushgw)
|
||||
|
||||
@@ -159,3 +165,67 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
httpClean()
|
||||
}, nil
|
||||
}
|
||||
|
||||
// initSiteUrl 初始化 site_info 中的 site_url,如果为空则使用服务器IP和端口设置默认值
|
||||
func InitSiteUrl(ctx *ctx.Context, serverIP string, serverPort int) {
|
||||
// 构造默认的 SiteUrl
|
||||
defaultSiteUrl := fmt.Sprintf("http://%s:%d", serverIP, serverPort)
|
||||
|
||||
// 获取现有的 site_info 配置
|
||||
siteInfoStr, err := models.ConfigsGet(ctx, "site_info")
|
||||
if err != nil {
|
||||
logger.Errorf("failed to get site_info config: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果 site_info 不存在,创建新的
|
||||
if siteInfoStr == "" {
|
||||
newSiteInfo := memsto.SiteInfo{
|
||||
SiteUrl: defaultSiteUrl,
|
||||
}
|
||||
siteInfoBytes, err := json.Marshal(newSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to set site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
|
||||
return
|
||||
}
|
||||
|
||||
// 检查现有的 site_info 中的 site_url 字段
|
||||
var existingSiteInfo memsto.SiteInfo
|
||||
err = json.Unmarshal([]byte(siteInfoStr), &existingSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to unmarshal site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果 site_url 已经有值,则不需要初始化
|
||||
if existingSiteInfo.SiteUrl != "" {
|
||||
return
|
||||
}
|
||||
|
||||
// 设置 site_url
|
||||
existingSiteInfo.SiteUrl = defaultSiteUrl
|
||||
|
||||
siteInfoBytes, err := json.Marshal(existingSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal updated site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to update site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
|
||||
}
|
||||
|
||||
@@ -3,11 +3,15 @@ package integration
|
||||
import (
|
||||
"encoding/json"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/container/set"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
@@ -15,7 +19,18 @@ import (
|
||||
|
||||
const SYSTEM = "system"
|
||||
|
||||
var BuiltinPayloadInFile *BuiltinPayloadInFileType
|
||||
|
||||
type BuiltinPayloadInFileType struct {
|
||||
Data map[uint64]map[string]map[string][]*models.BuiltinPayload // map[component_id]map[type]map[cate][]*models.BuiltinPayload
|
||||
IndexData map[int64]*models.BuiltinPayload // map[uuid]payload
|
||||
|
||||
BuiltinMetrics map[string]*models.BuiltinMetric
|
||||
}
|
||||
|
||||
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
BuiltinPayloadInFile = NewBuiltinPayloadInFileType()
|
||||
|
||||
err := models.InitBuiltinPayloads(ctx)
|
||||
if err != nil {
|
||||
logger.Warning("init old builtinPayloads fail ", err)
|
||||
@@ -109,13 +124,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
component.ID = old.ID
|
||||
}
|
||||
|
||||
// delete uuid is emtpy
|
||||
// delete uuid is empty
|
||||
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin payloads fail ", err)
|
||||
}
|
||||
|
||||
// delete builtin metrics uuid is emtpy
|
||||
// delete builtin metrics uuid is empty
|
||||
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin metrics fail ", err)
|
||||
@@ -146,11 +161,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
newAlerts := []models.AlertRule{}
|
||||
writeAlertFileFlag := false
|
||||
for _, alert := range alerts {
|
||||
if alert.UUID == 0 {
|
||||
writeAlertFileFlag = true
|
||||
alert.UUID = time.Now().UnixNano()
|
||||
time.Sleep(time.Microsecond)
|
||||
alert.UUID = time.Now().UnixMicro()
|
||||
}
|
||||
|
||||
newAlerts = append(newAlerts, alert)
|
||||
@@ -169,47 +183,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
Tags: alert.AppendTags,
|
||||
Content: string(content),
|
||||
UUID: alert.UUID,
|
||||
ID: alert.UUID,
|
||||
CreatedBy: SYSTEM,
|
||||
UpdatedBy: SYSTEM,
|
||||
}
|
||||
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinAlert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinAlert.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinAlert, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.ComponentID = component.ID
|
||||
old.Content = string(content)
|
||||
old.Name = alert.Name
|
||||
old.Tags = alert.AppendTags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if writeAlertFileFlag {
|
||||
bs, err = json.MarshalIndent(newAlerts, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin alerts file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,34 +239,14 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
Cate: "",
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Note: dashboard.Note,
|
||||
Content: string(content),
|
||||
UUID: dashboard.UUID,
|
||||
ID: dashboard.UUID,
|
||||
CreatedBy: SYSTEM,
|
||||
UpdatedBy: SYSTEM,
|
||||
}
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinDashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinDashboard.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinDashboard, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.ComponentID = component.ID
|
||||
old.Content = string(content)
|
||||
old.Name = dashboard.Name
|
||||
old.Tags = dashboard.Tags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
|
||||
}
|
||||
}
|
||||
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
|
||||
@@ -304,64 +264,23 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
metrics := []models.BuiltinMetric{}
|
||||
newMetrics := []models.BuiltinMetric{}
|
||||
err = json.Unmarshal(bs, &metrics)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component metrics file fail", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
writeMetricFileFlag := false
|
||||
for _, metric := range metrics {
|
||||
if metric.UUID == 0 {
|
||||
writeMetricFileFlag = true
|
||||
metric.UUID = time.Now().UnixNano()
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
}
|
||||
newMetrics = append(newMetrics, metric)
|
||||
metric.ID = metric.UUID
|
||||
metric.CreatedBy = SYSTEM
|
||||
metric.UpdatedBy = SYSTEM
|
||||
|
||||
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin metrics fail ", metric, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := metric.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin metrics fail ", metric, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.Collector = metric.Collector
|
||||
old.Typ = metric.Typ
|
||||
old.Name = metric.Name
|
||||
old.Unit = metric.Unit
|
||||
old.Note = metric.Note
|
||||
old.Lang = metric.Lang
|
||||
old.Expression = metric.Expression
|
||||
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
|
||||
}
|
||||
}
|
||||
BuiltinPayloadInFile.BuiltinMetrics[metric.Expression] = &metric
|
||||
}
|
||||
|
||||
if writeMetricFileFlag {
|
||||
bs, err = json.MarshalIndent(newMetrics, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin metrics file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
|
||||
@@ -375,6 +294,7 @@ type BuiltinBoard struct {
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Note string `json:"note"`
|
||||
CreateAt int64 `json:"create_at"`
|
||||
CreateBy string `json:"create_by"`
|
||||
UpdateAt int64 `json:"update_at"`
|
||||
@@ -387,3 +307,346 @@ type BuiltinBoard struct {
|
||||
Hide int `json:"hide"` // 0: false, 1: true
|
||||
UUID int64 `json:"uuid"`
|
||||
}
|
||||
|
||||
func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
|
||||
return &BuiltinPayloadInFileType{
|
||||
Data: make(map[uint64]map[string]map[string][]*models.BuiltinPayload),
|
||||
IndexData: make(map[int64]*models.BuiltinPayload),
|
||||
BuiltinMetrics: make(map[string]*models.BuiltinMetric),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
|
||||
if _, exists := b.Data[bp.ComponentID]; !exists {
|
||||
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
|
||||
}
|
||||
bpInType := b.Data[bp.ComponentID]
|
||||
if _, exists := bpInType[bp.Type]; !exists {
|
||||
bpInType[bp.Type] = make(map[string][]*models.BuiltinPayload)
|
||||
}
|
||||
bpInCate := bpInType[bp.Type]
|
||||
if _, exists := bpInCate[bp.Cate]; !exists {
|
||||
bpInCate[bp.Cate] = make([]*models.BuiltinPayload, 0)
|
||||
}
|
||||
bpInCate[bp.Cate] = append(bpInCate[bp.Cate], bp)
|
||||
|
||||
b.IndexData[bp.UUID] = bp
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
|
||||
|
||||
for _, source := range b.Data {
|
||||
if source == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
typeMap, exists := source[typ]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
payloads, exists := typeMap[cate]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(payloads) > 0 {
|
||||
return payloads[0].Component
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
|
||||
|
||||
var result []*models.BuiltinPayload
|
||||
source := b.Data[componentId]
|
||||
|
||||
if source == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
typeMap, exists := source[typ]
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if cate != "" {
|
||||
payloads, exists := typeMap[cate]
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
result = append(result, filterByQuery(payloads, query)...)
|
||||
} else {
|
||||
for _, payloads := range typeMap {
|
||||
result = append(result, filterByQuery(payloads, query)...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(result) > 0 {
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].Name < result[j].Name
|
||||
})
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetBuiltinPayloadCates(typ string, componentId uint64) ([]string, error) {
|
||||
var result []string
|
||||
source := b.Data[componentId]
|
||||
if source == nil {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
typeData := source[typ]
|
||||
if typeData == nil {
|
||||
return result, nil
|
||||
}
|
||||
for cate := range typeData {
|
||||
result = append(result, cate)
|
||||
}
|
||||
|
||||
sort.Strings(result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.BuiltinPayload {
|
||||
if query == "" {
|
||||
return payloads
|
||||
}
|
||||
|
||||
queryLower := strings.ToLower(query)
|
||||
var filtered []*models.BuiltinPayload
|
||||
for _, p := range payloads {
|
||||
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
|
||||
filtered = append(filtered, p)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricGets(metricsInDB []*models.BuiltinMetric, lang, collector, typ, query, unit string, limit, offset int) ([]*models.BuiltinMetric, int, error) {
|
||||
var filteredMetrics []*models.BuiltinMetric
|
||||
expressionSet := set.NewStringSet()
|
||||
builtinMetricsByDB := convertBuiltinMetricByDB(metricsInDB)
|
||||
builtinMetricsMap := make(map[string]*models.BuiltinMetric)
|
||||
|
||||
for expression, metric := range builtinMetricsByDB {
|
||||
builtinMetricsMap[expression] = metric
|
||||
}
|
||||
|
||||
for expression, metric := range b.BuiltinMetrics {
|
||||
builtinMetricsMap[expression] = metric
|
||||
}
|
||||
|
||||
for _, metric := range builtinMetricsMap {
|
||||
if !applyFilter(metric, collector, typ, query, unit) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if expression is already in db cache
|
||||
// NOTE: 忽略重复的expression,特别的,在旧版本中,用户可能已经创建了重复的metrics,需要覆盖掉ByFile中相同的Metrics
|
||||
// NOTE: Ignore duplicate expressions, especially in the old version, users may have created duplicate metrics,
|
||||
if expressionSet.Exists(metric.Expression) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Add db expression in set.
|
||||
expressionSet.Add(metric.Expression)
|
||||
|
||||
// Apply language
|
||||
trans, err := getTranslationWithLanguage(metric, lang)
|
||||
if err != nil {
|
||||
logger.Errorf("Error getting translation for metric %s: %v", metric.Name, err)
|
||||
continue // Skip if translation not found
|
||||
}
|
||||
metric.Name = trans.Name
|
||||
metric.Note = trans.Note
|
||||
|
||||
filteredMetrics = append(filteredMetrics, metric)
|
||||
}
|
||||
|
||||
// Sort metrics
|
||||
sort.Slice(filteredMetrics, func(i, j int) bool {
|
||||
if filteredMetrics[i].Collector != filteredMetrics[j].Collector {
|
||||
return filteredMetrics[i].Collector < filteredMetrics[j].Collector
|
||||
}
|
||||
if filteredMetrics[i].Typ != filteredMetrics[j].Typ {
|
||||
return filteredMetrics[i].Typ < filteredMetrics[j].Typ
|
||||
}
|
||||
return filteredMetrics[i].Expression < filteredMetrics[j].Expression
|
||||
})
|
||||
|
||||
totalCount := len(filteredMetrics)
|
||||
|
||||
// Validate parameters
|
||||
if offset < 0 {
|
||||
offset = 0
|
||||
}
|
||||
if limit < 0 {
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Handle edge cases
|
||||
if offset >= totalCount || limit == 0 {
|
||||
return []*models.BuiltinMetric{}, totalCount, nil
|
||||
}
|
||||
|
||||
// Apply pagination
|
||||
end := offset + limit
|
||||
if end > totalCount {
|
||||
end = totalCount
|
||||
}
|
||||
|
||||
return filteredMetrics[offset:end], totalCount, nil
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricTypes(lang, collector, query string) []string {
|
||||
typeSet := set.NewStringSet()
|
||||
for _, metric := range b.BuiltinMetrics {
|
||||
if !applyFilter(metric, collector, "", query, "") {
|
||||
continue
|
||||
}
|
||||
|
||||
typeSet.Add(metric.Typ)
|
||||
}
|
||||
|
||||
return typeSet.ToSlice()
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricCollectors(lang, typ, query string) []string {
|
||||
collectorSet := set.NewStringSet()
|
||||
for _, metric := range b.BuiltinMetrics {
|
||||
if !applyFilter(metric, "", typ, query, "") {
|
||||
continue
|
||||
}
|
||||
|
||||
collectorSet.Add(metric.Collector)
|
||||
}
|
||||
return collectorSet.ToSlice()
|
||||
}
|
||||
|
||||
func applyFilter(metric *models.BuiltinMetric, collector, typ, query, unit string) bool {
|
||||
if collector != "" && collector != metric.Collector {
|
||||
return false
|
||||
}
|
||||
|
||||
if typ != "" && typ != metric.Typ {
|
||||
return false
|
||||
}
|
||||
|
||||
if unit != "" && !containsUnit(unit, metric.Unit) {
|
||||
return false
|
||||
}
|
||||
|
||||
if query != "" && !applyQueryFilter(metric, query) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func containsUnit(unit, metricUnit string) bool {
|
||||
us := strings.Split(unit, ",")
|
||||
for _, u := range us {
|
||||
if u == metricUnit {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func applyQueryFilter(metric *models.BuiltinMetric, query string) bool {
|
||||
qs := strings.Split(query, " ")
|
||||
for _, q := range qs {
|
||||
if strings.HasPrefix(q, "-") {
|
||||
q = strings.TrimPrefix(q, "-")
|
||||
if strings.Contains(metric.Name, q) || strings.Contains(metric.Note, q) || strings.Contains(metric.Expression, q) {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if !strings.Contains(metric.Name, q) && !strings.Contains(metric.Note, q) && !strings.Contains(metric.Expression, q) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func getTranslationWithLanguage(bm *models.BuiltinMetric, lang string) (*models.Translation, error) {
|
||||
var defaultTranslation *models.Translation
|
||||
for _, t := range bm.Translation {
|
||||
if t.Lang == lang {
|
||||
return &t, nil
|
||||
}
|
||||
|
||||
if t.Lang == "en_US" {
|
||||
defaultTranslation = &t
|
||||
}
|
||||
}
|
||||
|
||||
if defaultTranslation != nil {
|
||||
return defaultTranslation, nil
|
||||
}
|
||||
|
||||
return nil, errors.Errorf("translation not found for metric %s", bm.Name)
|
||||
}
|
||||
|
||||
func convertBuiltinMetricByDB(metricsInDB []*models.BuiltinMetric) map[string]*models.BuiltinMetric {
|
||||
builtinMetricsByDB := make(map[string]*models.BuiltinMetric)
|
||||
builtinMetricsByDBList := make(map[string][]*models.BuiltinMetric)
|
||||
|
||||
for _, metric := range metricsInDB {
|
||||
builtinMetrics, ok := builtinMetricsByDBList[metric.Expression]
|
||||
if !ok {
|
||||
builtinMetrics = []*models.BuiltinMetric{}
|
||||
}
|
||||
|
||||
builtinMetrics = append(builtinMetrics, metric)
|
||||
builtinMetricsByDBList[metric.Expression] = builtinMetrics
|
||||
}
|
||||
|
||||
for expression, builtinMetrics := range builtinMetricsByDBList {
|
||||
if len(builtinMetrics) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// NOTE: 为兼容旧版本用户已经创建的 metrics,同时将修改 metrics 收敛到同一个记录上,
|
||||
// 我们选择使用 expression 相同但是 id 最小的 metric 记录作为主要的 Metric。
|
||||
sort.Slice(builtinMetrics, func(i, j int) bool {
|
||||
return builtinMetrics[i].ID < builtinMetrics[j].ID
|
||||
})
|
||||
|
||||
currentBuiltinMetric := builtinMetrics[0]
|
||||
// User has no customized translation, so we can merge it
|
||||
if len(currentBuiltinMetric.Translation) == 0 {
|
||||
translationMap := make(map[string]models.Translation)
|
||||
for _, bm := range builtinMetrics {
|
||||
for _, t := range getDefaultTranslation(bm) {
|
||||
translationMap[t.Lang] = t
|
||||
}
|
||||
}
|
||||
currentBuiltinMetric.Translation = make([]models.Translation, 0, len(translationMap))
|
||||
for _, t := range translationMap {
|
||||
currentBuiltinMetric.Translation = append(currentBuiltinMetric.Translation, t)
|
||||
}
|
||||
}
|
||||
|
||||
builtinMetricsByDB[expression] = currentBuiltinMetric
|
||||
}
|
||||
|
||||
return builtinMetricsByDB
|
||||
}
|
||||
|
||||
func getDefaultTranslation(bm *models.BuiltinMetric) []models.Translation {
|
||||
if len(bm.Translation) != 0 {
|
||||
return bm.Translation
|
||||
}
|
||||
|
||||
return []models.Translation{{
|
||||
Lang: bm.Lang,
|
||||
Name: bm.Name,
|
||||
Note: bm.Note,
|
||||
}}
|
||||
}
|
||||
|
||||
@@ -177,6 +177,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages := r.Group(pagesPrefix)
|
||||
{
|
||||
|
||||
pages.DELETE("/datasource/series", rt.auth(), rt.admin(), rt.deleteDatasourceSeries)
|
||||
if rt.Center.AnonymousAccess.PromQuerier {
|
||||
pages.Any("/proxy/:id/*url", rt.dsProxy)
|
||||
pages.POST("/query-range-batch", rt.promBatchQueryRange)
|
||||
@@ -231,6 +232,11 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/log-query", rt.QueryLog)
|
||||
}
|
||||
|
||||
// OpenSearch 专用接口
|
||||
pages.POST("/os-indices", rt.QueryOSIndices)
|
||||
pages.POST("/os-variable", rt.QueryOSVariable)
|
||||
pages.POST("/os-fields", rt.QueryOSFields)
|
||||
|
||||
pages.GET("/sql-template", rt.QuerySqlTemplate)
|
||||
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
|
||||
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
|
||||
@@ -244,9 +250,11 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/auth/redirect", rt.loginRedirect)
|
||||
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
|
||||
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
|
||||
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
|
||||
pages.GET("/auth/callback", rt.loginCallback)
|
||||
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
|
||||
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
|
||||
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
|
||||
pages.GET("/auth/perms", rt.allPerms)
|
||||
|
||||
pages.GET("/metrics/desc", rt.metricsDescGetFile)
|
||||
@@ -254,6 +262,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
|
||||
pages.GET("/notify-channels", rt.notifyChannelsGets)
|
||||
pages.GET("/contact-keys", rt.contactKeysGets)
|
||||
pages.GET("/install-date", rt.installDateGet)
|
||||
|
||||
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
|
||||
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
|
||||
@@ -309,6 +318,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
|
||||
|
||||
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
|
||||
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
|
||||
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
|
||||
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
|
||||
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
|
||||
@@ -372,6 +382,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
|
||||
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
|
||||
pages.POST("/busi-groups/alert-rules/clones", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.batchAlertRuleClone)
|
||||
pages.POST("/busi-group/alert-rules/notify-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleNotifyTryRun)
|
||||
pages.POST("/busi-group/alert-rules/enable-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleEnableTryRun)
|
||||
|
||||
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
@@ -397,6 +409,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
|
||||
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
|
||||
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
|
||||
pages.POST("/alert-subscribe/alert-subscribes-tryrun", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.alertSubscribeTryRun)
|
||||
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
@@ -439,7 +452,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
|
||||
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
|
||||
|
||||
pages.GET("/roles", rt.auth(), rt.user(), rt.perm("/roles"), rt.roleGets)
|
||||
pages.GET("/roles", rt.auth(), rt.user(), rt.roleGets)
|
||||
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
|
||||
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
|
||||
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
|
||||
@@ -513,10 +526,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
|
||||
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
|
||||
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
|
||||
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/components"), rt.builtinPayloadGet)
|
||||
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
|
||||
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
|
||||
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
|
||||
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUID)
|
||||
|
||||
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
|
||||
pages.DELETE("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/del"), rt.messageTemplatesDel)
|
||||
@@ -534,6 +546,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
|
||||
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
|
||||
|
||||
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
|
||||
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
|
||||
|
||||
// 事件Pipeline相关路由
|
||||
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
|
||||
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
|
||||
@@ -550,6 +565,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels"), rt.notifyChannelsGet)
|
||||
pages.GET("/simplified-notify-channel-configs", rt.notifyChannelsGetForNormalUser)
|
||||
pages.GET("/flashduty-channel-list/:id", rt.auth(), rt.user(), rt.flashDutyNotifyChannelsGet)
|
||||
pages.GET("/pagerduty-integration-key/:id/:service_id/:integration_id", rt.auth(), rt.user(), rt.pagerDutyIntegrationKeyGet)
|
||||
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
|
||||
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
|
||||
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
|
||||
}
|
||||
@@ -615,6 +632,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/recording-rules", rt.recordingRuleGetsByService)
|
||||
|
||||
service.GET("/alert-mutes", rt.alertMuteGets)
|
||||
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
|
||||
service.POST("/alert-mutes", rt.alertMuteAddByService)
|
||||
service.DELETE("/alert-mutes", rt.alertMuteDel)
|
||||
|
||||
@@ -663,6 +681,14 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/message-templates", rt.messageTemplateGets)
|
||||
|
||||
service.GET("/event-pipelines", rt.eventPipelinesListByService)
|
||||
|
||||
// 手机号加密存储配置接口
|
||||
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
|
||||
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
|
||||
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
|
||||
|
||||
service.GET("/builtin-components", rt.builtinComponentsGets)
|
||||
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
|
||||
@@ -263,11 +264,11 @@ func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, erro
|
||||
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
event.NotifyRules, err = GetEventNorifyRuleNames(ctx, event.NotifyRuleIds)
|
||||
event.NotifyRules, err = GetEventNotifyRuleNames(ctx, event.NotifyRuleIds)
|
||||
return event, err
|
||||
}
|
||||
|
||||
func GetEventNorifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
|
||||
func GetEventNotifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
|
||||
notifyRuleNames := make([]*models.EventNotifyRule, 0)
|
||||
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
|
||||
if err != nil {
|
||||
@@ -305,3 +306,123 @@ func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
|
||||
hash := ginx.QueryStr(c, "hash")
|
||||
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
|
||||
}
|
||||
|
||||
func (rt *Router) eventTagKeys(c *gin.Context) {
|
||||
// 获取最近1天的活跃告警事件
|
||||
now := time.Now().Unix()
|
||||
stime := now - 24*3600
|
||||
etime := now
|
||||
|
||||
// 获取用户可见的业务组ID列表
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get business group ids: %v", err)
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 查询活跃告警事件,限制数量以提高性能
|
||||
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get current alert events: %v", err)
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果没有查到事件,返回默认标签
|
||||
if len(events) == 0 {
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 收集所有标签键并去重
|
||||
tagKeys := make(map[string]struct{})
|
||||
for _, event := range events {
|
||||
for key := range event.TagsMap {
|
||||
tagKeys[key] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为字符串切片
|
||||
var result []string
|
||||
for key := range tagKeys {
|
||||
result = append(result, key)
|
||||
}
|
||||
|
||||
// 如果没有收集到任何标签键,返回默认值
|
||||
if len(result) == 0 {
|
||||
result = []string{"ident", "app", "service", "instance"}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) eventTagValues(c *gin.Context) {
|
||||
// 获取标签key
|
||||
tagKey := ginx.QueryStr(c, "key")
|
||||
|
||||
// 获取最近1天的活跃告警事件
|
||||
now := time.Now().Unix()
|
||||
stime := now - 24*3600
|
||||
etime := now
|
||||
|
||||
// 获取用户可见的业务组ID列表
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get business group ids: %v", err)
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 查询活跃告警事件,获取更多数据以保证统计准确性
|
||||
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get current alert events: %v", err)
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果没有查到事件,返回空数组
|
||||
if len(events) == 0 {
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 统计标签值出现次数
|
||||
valueCount := make(map[string]int)
|
||||
for _, event := range events {
|
||||
// TagsMap已经在AlertCurEventsGet中处理,直接使用
|
||||
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
|
||||
valueCount[value]++
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为切片并按出现次数降序排序
|
||||
type tagValue struct {
|
||||
value string
|
||||
count int
|
||||
}
|
||||
|
||||
tagValues := make([]tagValue, 0, len(valueCount))
|
||||
for value, count := range valueCount {
|
||||
tagValues = append(tagValues, tagValue{value, count})
|
||||
}
|
||||
|
||||
// 按出现次数降序排序
|
||||
sort.Slice(tagValues, func(i, j int) bool {
|
||||
return tagValues[i].count > tagValues[j].count
|
||||
})
|
||||
|
||||
// 只取Top20并转换为字符串数组
|
||||
limit := 20
|
||||
if len(tagValues) < limit {
|
||||
limit = len(tagValues)
|
||||
}
|
||||
|
||||
result := make([]string, 0, limit)
|
||||
for i := 0; i < limit; i++ {
|
||||
result = append(result, tagValues[i].value)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
@@ -62,11 +62,11 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
|
||||
recovered, dsIds, cates, ruleId, query)
|
||||
recovered, dsIds, cates, ruleId, query, []int64{})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
|
||||
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
|
||||
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
@@ -115,7 +115,18 @@ func (rt *Router) alertHisEventsDelete(c *gin.Context) {
|
||||
time.Sleep(100 * time.Millisecond) // 防止锁表
|
||||
}
|
||||
}()
|
||||
ginx.NewRender(c).Message("Alert history events deletion started")
|
||||
ginx.NewRender(c).Data("Alert history events deletion started", nil)
|
||||
}
|
||||
|
||||
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
|
||||
|
||||
func init() {
|
||||
TransferEventToCur = transferEventToCur
|
||||
}
|
||||
|
||||
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
|
||||
cur := event.ToCur()
|
||||
return cur
|
||||
}
|
||||
|
||||
func (rt *Router) alertHisEventGet(c *gin.Context) {
|
||||
@@ -141,8 +152,8 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
|
||||
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
|
||||
ginx.NewRender(c).Data(event, err)
|
||||
event.NotifyRules, err = GetEventNotifyRuleNames(rt.Ctx, event.NotifyRuleIds)
|
||||
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
|
||||
}
|
||||
|
||||
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
@@ -18,6 +19,7 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
@@ -33,13 +35,12 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
ars[i].FillSeverities()
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
|
||||
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
|
||||
stime = ginx.QueryInt64(c, "stime", 0)
|
||||
etime = ginx.QueryInt64(c, "etime", 0)
|
||||
if etime == 0 {
|
||||
@@ -78,7 +79,6 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
names := make([]string, 0, len(ars))
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
ars[i].FillSeverities()
|
||||
|
||||
if len(ars[i].DatasourceQueries) != 0 {
|
||||
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
|
||||
@@ -88,7 +88,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
names = append(names, ars[i].UpdateBy)
|
||||
}
|
||||
|
||||
stime, etime := getAlertCueEventTimeRange(c)
|
||||
stime, etime := GetAlertCueEventTimeRange(c)
|
||||
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
|
||||
if cnt != nil {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
@@ -157,6 +157,120 @@ func (rt *Router) alertRuleAddByFE(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
type AlertRuleTryRunForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertRuleConfig models.AlertRule `json:"config" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleNotifyTryRun(c *gin.Context) {
|
||||
// check notify channels of old version
|
||||
var f AlertRuleTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
if f.AlertRuleConfig.NotifyVersion == 1 {
|
||||
for _, id := range f.AlertRuleConfig.NotifyRuleIds {
|
||||
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
for _, notifyConfig := range notifyRule.NotifyConfigs {
|
||||
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("notification test ok", nil)
|
||||
return
|
||||
}
|
||||
|
||||
if len(f.AlertRuleConfig.NotifyChannelsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, "no notify channels selected")
|
||||
}
|
||||
|
||||
if len(f.AlertRuleConfig.NotifyGroupsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, "no notify groups selected")
|
||||
}
|
||||
|
||||
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
|
||||
ugids := f.AlertRuleConfig.NotifyGroupsJSON
|
||||
ngids := make([]int64, 0)
|
||||
for i := 0; i < len(ugids); i++ {
|
||||
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
|
||||
ngids = append(ngids, gid)
|
||||
}
|
||||
}
|
||||
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
|
||||
uids := make([]int64, 0)
|
||||
for i := range userGroups {
|
||||
uids = append(uids, userGroups[i].UserIds...)
|
||||
}
|
||||
users := rt.UserCache.GetByUserIds(uids)
|
||||
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
|
||||
flag := true
|
||||
// ignore non-default channels
|
||||
switch NotifyChannels {
|
||||
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
|
||||
models.Telegram, models.Email, models.FeishuCard:
|
||||
// do nothing
|
||||
default:
|
||||
continue
|
||||
}
|
||||
// default channels
|
||||
for ui := range users {
|
||||
if _, b := users[ui].ExtractToken(NotifyChannels); b {
|
||||
flag = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if flag {
|
||||
ancs = append(ancs, NotifyChannels)
|
||||
}
|
||||
}
|
||||
if len(ancs) > 0 {
|
||||
ginx.Dangerous(errors.New(fmt.Sprintf("All users are missing notify channel configurations. Please check for missing tokens (each channel should be configured with at least one user). %v", ancs)))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("notification test ok", nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleEnableTryRun(c *gin.Context) {
|
||||
// check notify channels of old version
|
||||
var f AlertRuleTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
if f.AlertRuleConfig.Disabled == 1 {
|
||||
ginx.Bomb(http.StatusOK, "rule is disabled")
|
||||
}
|
||||
|
||||
if mute.TimeSpanMuteStrategy(&f.AlertRuleConfig, &curEvent) {
|
||||
ginx.Bomb(http.StatusOK, "event is not match for period of time")
|
||||
}
|
||||
|
||||
if mute.BgNotMatchMuteStrategy(&f.AlertRuleConfig, &curEvent, rt.TargetCache) {
|
||||
ginx.Bomb(http.StatusOK, "event target busi group not match rule busi group")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("event is effective", nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleAddByImport(c *gin.Context) {
|
||||
username := c.MustGet("username").(string)
|
||||
|
||||
@@ -174,6 +288,15 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
|
||||
models.DataSourceQueryAll,
|
||||
}
|
||||
}
|
||||
|
||||
// 将导入的规则统一转为新版本的通知规则配置
|
||||
lst[i].NotifyVersion = 1
|
||||
lst[i].NotifyChannelsJSON = []string{}
|
||||
lst[i].NotifyGroupsJSON = []string{}
|
||||
lst[i].NotifyChannels = ""
|
||||
lst[i].NotifyGroups = ""
|
||||
lst[i].Callbacks = ""
|
||||
lst[i].CallbacksJSON = []string{}
|
||||
}
|
||||
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
@@ -192,19 +315,52 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
|
||||
var f promRuleForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
// 首先尝试解析带 groups 的格式
|
||||
var pr struct {
|
||||
Groups []models.PromRuleGroup `yaml:"groups"`
|
||||
}
|
||||
err := yaml.Unmarshal([]byte(f.Payload), &pr)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
|
||||
|
||||
var groups []models.PromRuleGroup
|
||||
|
||||
if err != nil || len(pr.Groups) == 0 {
|
||||
// 如果解析失败或没有 groups,尝试解析规则数组格式
|
||||
var rules []models.PromRule
|
||||
err = yaml.Unmarshal([]byte(f.Payload), &rules)
|
||||
if err != nil {
|
||||
// 最后尝试解析单个规则格式
|
||||
var singleRule models.PromRule
|
||||
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
|
||||
}
|
||||
|
||||
// 验证单个规则是否有效
|
||||
if singleRule.Alert == "" && singleRule.Record == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
|
||||
}
|
||||
|
||||
rules = []models.PromRule{singleRule}
|
||||
}
|
||||
|
||||
// 验证规则数组是否为空
|
||||
if len(rules) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
|
||||
}
|
||||
|
||||
// 将规则数组包装成 group
|
||||
groups = []models.PromRuleGroup{
|
||||
{
|
||||
Name: "imported_rules",
|
||||
Rules: rules,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// 使用已解析的 groups
|
||||
groups = pr.Groups
|
||||
}
|
||||
|
||||
if len(pr.Groups) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
|
||||
}
|
||||
|
||||
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
|
||||
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
|
||||
username := c.MustGet("username").(string)
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
|
||||
@@ -349,8 +505,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "fields empty")
|
||||
}
|
||||
|
||||
f.Fields["update_by"] = c.MustGet("username").(string)
|
||||
f.Fields["update_at"] = time.Now().Unix()
|
||||
updateBy := c.MustGet("username").(string)
|
||||
updateAt := time.Now().Unix()
|
||||
|
||||
for i := 0; i < len(f.Ids); i++ {
|
||||
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
|
||||
@@ -367,7 +523,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(originRule)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,7 +535,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(ar.AnnotationsJSON)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -393,7 +547,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(ar.AnnotationsJSON)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -403,7 +556,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
callback := callbacks.(string)
|
||||
if !strings.Contains(ar.Callbacks, callback) {
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -413,7 +565,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
if callbacks, has := f.Fields["callbacks"]; has {
|
||||
callback := callbacks.(string)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -423,7 +574,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
bytes, err := json.Marshal(datasourceQueries)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -439,6 +589,12 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
|
||||
}
|
||||
}
|
||||
|
||||
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
|
||||
"update_by": updateBy,
|
||||
"update_at": updateAt,
|
||||
}))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -2,13 +2,17 @@ package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// Return all, front-end search and paging
|
||||
@@ -104,6 +108,148 @@ func (rt *Router) alertSubscribeAdd(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
type SubscribeTryRunForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
SubscribeConfig models.AlertSubscribe `json:"config" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertSubscribeTryRun(c *gin.Context) {
|
||||
var f SubscribeTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
ginx.Dangerous(f.SubscribeConfig.Verify())
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
// 先判断匹配条件
|
||||
if !f.SubscribeConfig.MatchCluster(curEvent.DatasourceId) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event datasource not match"))
|
||||
}
|
||||
|
||||
if len(f.SubscribeConfig.RuleIds) != 0 {
|
||||
match := false
|
||||
for _, rid := range f.SubscribeConfig.RuleIds {
|
||||
if rid == curEvent.RuleId {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event rule id not match"))
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配 tag
|
||||
f.SubscribeConfig.Parse()
|
||||
if !common.MatchTags(curEvent.TagsMap, f.SubscribeConfig.ITags) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event tags not match"))
|
||||
}
|
||||
|
||||
// 匹配group name
|
||||
if !common.MatchGroupsName(curEvent.GroupName, f.SubscribeConfig.IBusiGroups) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event group name not match"))
|
||||
}
|
||||
|
||||
// 检查严重级别(Severity)匹配
|
||||
if len(f.SubscribeConfig.SeveritiesJson) != 0 {
|
||||
match := false
|
||||
for _, s := range f.SubscribeConfig.SeveritiesJson {
|
||||
if s == curEvent.Severity || s == 0 {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event severity not match"))
|
||||
}
|
||||
}
|
||||
|
||||
// 新版本通知规则
|
||||
if f.SubscribeConfig.NotifyVersion == 1 {
|
||||
if len(f.SubscribeConfig.NotifyRuleIds) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify rules selected"))
|
||||
}
|
||||
|
||||
for _, id := range f.SubscribeConfig.NotifyRuleIds {
|
||||
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, i18n.Sprintf(lang, "subscribe notify rule not found: %v", err))
|
||||
}
|
||||
|
||||
for _, notifyConfig := range notifyRule.NotifyConfigs {
|
||||
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "notify rule send error: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notification test ok"), nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 旧版通知方式
|
||||
f.SubscribeConfig.ModifyEvent(&curEvent)
|
||||
if len(curEvent.NotifyChannelsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify channels selected"))
|
||||
}
|
||||
|
||||
if len(curEvent.NotifyGroupsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, i18n.Sprintf(lang, "no notify groups selected"))
|
||||
}
|
||||
|
||||
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
|
||||
ugids := strings.Fields(f.SubscribeConfig.UserGroupIds)
|
||||
ngids := make([]int64, 0)
|
||||
for i := 0; i < len(ugids); i++ {
|
||||
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
|
||||
ngids = append(ngids, gid)
|
||||
}
|
||||
}
|
||||
|
||||
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
|
||||
uids := make([]int64, 0)
|
||||
for i := range userGroups {
|
||||
uids = append(uids, userGroups[i].UserIds...)
|
||||
}
|
||||
users := rt.UserCache.GetByUserIds(uids)
|
||||
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
|
||||
flag := true
|
||||
// ignore non-default channels
|
||||
switch NotifyChannels {
|
||||
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
|
||||
models.Telegram, models.Email, models.FeishuCard:
|
||||
// do nothing
|
||||
default:
|
||||
continue
|
||||
}
|
||||
// default channels
|
||||
for ui := range users {
|
||||
if _, b := users[ui].ExtractToken(NotifyChannels); b {
|
||||
flag = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if flag {
|
||||
ancs = append(ancs, NotifyChannels)
|
||||
}
|
||||
}
|
||||
if len(ancs) > 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "all users missing notify channel configurations: %v", ancs))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notify settings ok"), nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
var fs []models.AlertSubscribe
|
||||
ginx.BindJSON(c, &fs)
|
||||
@@ -142,6 +288,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
"busi_groups",
|
||||
"note",
|
||||
"notify_rule_ids",
|
||||
"notify_version",
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ type boardForm struct {
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Note string `json:"note"`
|
||||
Configs string `json:"configs"`
|
||||
Public int `json:"public"`
|
||||
PublicCate int `json:"public_cate"`
|
||||
@@ -34,6 +35,7 @@ func (rt *Router) boardAdd(c *gin.Context) {
|
||||
Name: f.Name,
|
||||
Ident: f.Ident,
|
||||
Tags: f.Tags,
|
||||
Note: f.Note,
|
||||
Configs: f.Configs,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
@@ -115,6 +117,10 @@ func (rt *Router) boardPureGet(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusNotFound, "No such dashboard")
|
||||
}
|
||||
|
||||
// 清除创建者和更新者信息
|
||||
board.CreateBy = ""
|
||||
board.UpdateBy = ""
|
||||
|
||||
ginx.NewRender(c).Data(board, nil)
|
||||
}
|
||||
|
||||
@@ -180,10 +186,11 @@ func (rt *Router) boardPut(c *gin.Context) {
|
||||
bo.Name = f.Name
|
||||
bo.Ident = f.Ident
|
||||
bo.Tags = f.Tags
|
||||
bo.Note = f.Note
|
||||
bo.UpdateBy = me.Username
|
||||
bo.UpdateAt = time.Now().Unix()
|
||||
|
||||
err = bo.Update(rt.Ctx, "name", "ident", "tags", "update_by", "update_at")
|
||||
err = bo.Update(rt.Ctx, "name", "ident", "tags", "note", "update_by", "update_at")
|
||||
ginx.NewRender(c).Data(bo, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -29,7 +31,7 @@ func (rt *Router) builtinMetricsAdd(c *gin.Context) {
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
lst[i].Lang = lang
|
||||
lst[i].UUID = time.Now().UnixNano()
|
||||
lst[i].UUID = time.Now().UnixMicro()
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
@@ -48,11 +50,12 @@ func (rt *Router) builtinMetricsGets(c *gin.Context) {
|
||||
lang = "zh_CN"
|
||||
}
|
||||
|
||||
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
|
||||
bmInDB, err := models.BuiltinMetricGets(rt.Ctx, "", collector, typ, query, unit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
|
||||
bm, total, err := integration.BuiltinPayloadInFile.BuiltinMetricGets(bmInDB, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": bm,
|
||||
"total": total,
|
||||
@@ -100,8 +103,26 @@ func (rt *Router) builtinMetricsTypes(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
|
||||
ginx.NewRender(c).Data(metricTypeList, err)
|
||||
metricTypeListInDB, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
metricTypeListInFile := integration.BuiltinPayloadInFile.BuiltinMetricTypes(lang, collector, query)
|
||||
|
||||
typeMap := make(map[string]struct{})
|
||||
for _, metricType := range metricTypeListInDB {
|
||||
typeMap[metricType] = struct{}{}
|
||||
}
|
||||
for _, metricType := range metricTypeListInFile {
|
||||
typeMap[metricType] = struct{}{}
|
||||
}
|
||||
|
||||
metricTypeList := make([]string, 0, len(typeMap))
|
||||
for metricType := range typeMap {
|
||||
metricTypeList = append(metricTypeList, metricType)
|
||||
}
|
||||
sort.Strings(metricTypeList)
|
||||
|
||||
ginx.NewRender(c).Data(metricTypeList, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
|
||||
@@ -109,5 +130,24 @@ func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
|
||||
collectorListInDB, err := models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
collectorListInFile := integration.BuiltinPayloadInFile.BuiltinMetricCollectors(lang, typ, query)
|
||||
|
||||
collectorMap := make(map[string]struct{})
|
||||
for _, collector := range collectorListInDB {
|
||||
collectorMap[collector] = struct{}{}
|
||||
}
|
||||
for _, collector := range collectorListInFile {
|
||||
collectorMap[collector] = struct{}{}
|
||||
}
|
||||
|
||||
collectorList := make([]string, 0, len(collectorMap))
|
||||
for collector := range collectorMap {
|
||||
collectorList = append(collectorList, collector)
|
||||
}
|
||||
sort.Strings(collectorList)
|
||||
|
||||
ginx.NewRender(c).Data(collectorList, nil)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
@@ -18,6 +19,7 @@ type Board struct {
|
||||
Tags string `json:"tags"`
|
||||
Configs interface{} `json:"configs"`
|
||||
UUID int64 `json:"uuid"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
@@ -128,6 +130,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Note: dashboard.Note,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
@@ -163,6 +166,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Note: dashboard.Note,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
@@ -192,13 +196,26 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
|
||||
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
if typ == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "type is required")
|
||||
return
|
||||
}
|
||||
ComponentID := ginx.QueryInt64(c, "component_id", 0)
|
||||
|
||||
cate := ginx.QueryStr(c, "cate", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lstInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayload(typ, cate, query, uint64(ComponentID))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if len(lstInFile) > 0 {
|
||||
lst = append(lst, lstInFile...)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
@@ -206,21 +223,31 @@ func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
ComponentID := ginx.QueryInt64(c, "component_id", 0)
|
||||
|
||||
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
|
||||
ginx.NewRender(c).Data(cates, err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
|
||||
func (rt *Router) builtinPayloadGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
catesInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayloadCates(typ, uint64(ComponentID))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
if bp == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
|
||||
// 使用 map 进行去重
|
||||
cateMap := make(map[string]bool)
|
||||
|
||||
// 添加数据库中的分类
|
||||
for _, cate := range cates {
|
||||
cateMap[cate] = true
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(bp, nil)
|
||||
// 添加文件中的分类
|
||||
for _, cate := range catesInFile {
|
||||
cateMap[cate] = true
|
||||
}
|
||||
|
||||
// 将去重后的结果转换回切片
|
||||
result := make([]string, 0, len(cateMap))
|
||||
for cate := range cateMap {
|
||||
result = append(result, cate)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
@@ -251,6 +278,7 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
|
||||
req.Name = dashboard.Name
|
||||
req.Tags = dashboard.Tags
|
||||
req.Note = dashboard.Note
|
||||
} else if req.Type == "collect" {
|
||||
c := make(map[string]interface{})
|
||||
if _, err := toml.Decode(req.Content, &c); err != nil {
|
||||
@@ -273,14 +301,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
|
||||
uuid := ginx.QueryInt64(c, "uuid", 0)
|
||||
// 优先以 uuid 为准
|
||||
if uuid != 0 {
|
||||
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
|
||||
return
|
||||
}
|
||||
func (rt *Router) builtinPayloadsGetByUUID(c *gin.Context) {
|
||||
uuid := ginx.QueryInt64(c, "uuid")
|
||||
|
||||
id := ginx.QueryInt64(c, "id", 0)
|
||||
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if bp != nil {
|
||||
ginx.NewRender(c).Data(bp, nil)
|
||||
} else {
|
||||
ginx.NewRender(c).Data(integration.BuiltinPayloadInFile.IndexData[uuid], nil)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -100,7 +105,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
|
||||
if !req.ForceSave {
|
||||
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
|
||||
err = DatasourceCheck(req)
|
||||
err = DatasourceCheck(c, req)
|
||||
if err != nil {
|
||||
Dangerous(c, err)
|
||||
return
|
||||
@@ -108,6 +113,90 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range req.SettingsJson {
|
||||
if strings.Contains(k, "cluster_name") {
|
||||
req.ClusterName = v.(string)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.OPENSEARCH {
|
||||
b, err := json.Marshal(req.SettingsJson)
|
||||
if err != nil {
|
||||
logger.Warningf("marshal settings fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var os opensearch.OpenSearch
|
||||
err = json.Unmarshal(b, &os)
|
||||
if err != nil {
|
||||
logger.Warningf("unmarshal settings fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(os.Nodes) == 0 {
|
||||
logger.Warningf("nodes empty, %+v", req)
|
||||
return
|
||||
}
|
||||
|
||||
req.HTTPJson = models.HTTP{
|
||||
Timeout: os.Timeout,
|
||||
Url: os.Nodes[0],
|
||||
Headers: os.Headers,
|
||||
TLS: models.TLS{
|
||||
SkipTlsVerify: os.TLS.SkipTlsVerify,
|
||||
},
|
||||
}
|
||||
|
||||
req.AuthJson = models.Auth{
|
||||
BasicAuth: os.Basic.Enable,
|
||||
BasicAuthUser: os.Basic.Username,
|
||||
BasicAuthPassword: os.Basic.Password,
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.CLICKHOUSE {
|
||||
b, err := json.Marshal(req.SettingsJson)
|
||||
if err != nil {
|
||||
logger.Warningf("marshal clickhouse settings failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var ckConfig clickhouse.Clickhouse
|
||||
err = json.Unmarshal(b, &ckConfig)
|
||||
if err != nil {
|
||||
logger.Warningf("unmarshal clickhouse settings failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
// 检查ckconfig的nodes不应该以http://或https://开头
|
||||
for _, addr := range ckConfig.Nodes {
|
||||
if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") {
|
||||
err = fmt.Errorf("clickhouse node address should not start with http:// or https:// : %s", addr)
|
||||
logger.Warningf("clickhouse node address invalid: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// InitCli 会自动检测并选择 HTTP 或 Native 协议
|
||||
err = ckConfig.InitCli()
|
||||
if err != nil {
|
||||
logger.Warningf("clickhouse connection failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 执行 SHOW DATABASES 测试连通性
|
||||
_, err = ckConfig.ShowDatabases(context.Background())
|
||||
if err != nil {
|
||||
logger.Warningf("clickhouse test query failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if req.Id == 0 {
|
||||
req.CreatedBy = username
|
||||
req.Status = "enabled"
|
||||
@@ -129,7 +218,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
Render(c, nil, err)
|
||||
}
|
||||
|
||||
func DatasourceCheck(ds models.Datasource) error {
|
||||
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
|
||||
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
|
||||
if ds.HTTPJson.Url == "" {
|
||||
return fmt.Errorf("url is empty")
|
||||
@@ -188,6 +277,10 @@ func DatasourceCheck(ds models.Datasource) error {
|
||||
req, err = http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("Error creating request: %v", err)
|
||||
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
|
||||
lang := c.GetHeader("X-Language")
|
||||
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
|
||||
}
|
||||
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
|
||||
}
|
||||
}
|
||||
@@ -209,6 +302,10 @@ func DatasourceCheck(ds models.Datasource) error {
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
logger.Errorf("Error making request: %v\n", resp.StatusCode)
|
||||
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
|
||||
lang := c.GetHeader("X-Language")
|
||||
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
|
||||
}
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
@@ -60,8 +60,8 @@ func (rt *Router) ShowTables(c *gin.Context) {
|
||||
}
|
||||
switch plug.(type) {
|
||||
case TableShower:
|
||||
if len(f.Querys) > 0 {
|
||||
database, ok := f.Querys[0].(string)
|
||||
if len(f.Queries) > 0 {
|
||||
database, ok := f.Queries[0].(string)
|
||||
if ok {
|
||||
tables, err = plug.(TableShower).ShowTables(c.Request.Context(), database)
|
||||
}
|
||||
@@ -90,8 +90,8 @@ func (rt *Router) DescribeTable(c *gin.Context) {
|
||||
switch plug.(type) {
|
||||
case TableDescriber:
|
||||
client := plug.(TableDescriber)
|
||||
if len(f.Querys) > 0 {
|
||||
columns, err = client.DescribeTable(c.Request.Context(), f.Querys[0])
|
||||
if len(f.Queries) > 0 {
|
||||
columns, err = client.DescribeTable(c.Request.Context(), f.Queries[0])
|
||||
}
|
||||
default:
|
||||
ginx.Bomb(200, "datasource not exists")
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// 获取事件Pipeline列表
|
||||
@@ -140,18 +140,32 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
}
|
||||
event := hisEvent.ToCur()
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
var result string
|
||||
for _, p := range f.PipelineConfig.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
event, result, err = processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
m := map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, result),
|
||||
}
|
||||
ginx.NewRender(c).Data(m, nil)
|
||||
}
|
||||
|
||||
// 测试事件处理器
|
||||
@@ -170,15 +184,18 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
|
||||
|
||||
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor err: %+v", err)
|
||||
ginx.Bomb(200, "get processor err: %+v", err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
logger.Infof("processor %+v result: %+v", f.ProcessorConfig, event)
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
event, res, err := processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "processor err: %+v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, res),
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
@@ -212,9 +229,18 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
|
||||
event, _, err := processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,6 +128,12 @@ func UserGroup(ctx *ctx.Context, id int64) *models.UserGroup {
|
||||
ginx.Bomb(http.StatusNotFound, "No such UserGroup")
|
||||
}
|
||||
|
||||
bgids, err := models.BusiGroupIds(ctx, []int64{id})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
obj.BusiGroups, err = models.BusiGroupGetByIds(ctx, bgids)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
|
||||
@@ -2,13 +2,16 @@ package router
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
@@ -17,8 +20,10 @@ import (
|
||||
"github.com/dgrijalva/jwt-go"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type loginForm struct {
|
||||
@@ -107,9 +112,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
|
||||
var logoutAddr string
|
||||
user := c.MustGet("user").(*models.User)
|
||||
|
||||
// 获取用户的 id_token
|
||||
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
|
||||
if err != nil {
|
||||
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
idToken = "" // 如果获取失败,使用空字符串
|
||||
}
|
||||
|
||||
// 删除 id_token
|
||||
rt.deleteIdToken(c.Request.Context(), user.Id)
|
||||
|
||||
switch user.Belong {
|
||||
case "oidc":
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
|
||||
case "cas":
|
||||
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
|
||||
case "oauth2":
|
||||
@@ -199,6 +215,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
|
||||
// 注意:这里不会获取新的 id_token,只是延长 Redis 中现有 id_token 的 TTL
|
||||
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
|
||||
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"access_token": ts.AccessToken,
|
||||
"refresh_token": ts.RefreshToken,
|
||||
@@ -286,6 +310,13 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 保存 id_token 到 Redis,用于登出时使用
|
||||
if ret.IdToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
|
||||
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
}
|
||||
}
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
@@ -413,6 +444,81 @@ func (rt *Router) loginRedirectOAuth(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
|
||||
redirect := ginx.QueryStr(c, "redirect", "/")
|
||||
|
||||
v, exists := c.Get("userid")
|
||||
if exists {
|
||||
userid := v.(int64)
|
||||
user, err := models.UserGetById(rt.Ctx, userid)
|
||||
ginx.Dangerous(err)
|
||||
if user == nil {
|
||||
ginx.Bomb(200, "user not found")
|
||||
}
|
||||
|
||||
if user.Username != "" { // already login
|
||||
ginx.NewRender(c).Data(redirect, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !rt.Sso.DingTalk.Enable {
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
|
||||
redirect, err := rt.Sso.DingTalk.Authorize(rt.Redis, redirect)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
if err != nil {
|
||||
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.DingTalk.DingTalkConfig.CoverAttributes {
|
||||
updatedFields := user.UpdateSsoFields(dingtalk.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
user = new(models.User)
|
||||
user.FullSsoFields(dingtalk.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.DingTalk.DingTalkConfig.DefaultRoles)
|
||||
// create user from dingtalk
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
// set user login state
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(CallbackOutput{
|
||||
Redirect: redirect,
|
||||
User: user,
|
||||
AccessToken: ts.AccessToken,
|
||||
RefreshToken: ts.RefreshToken,
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
@@ -459,13 +565,14 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
}
|
||||
|
||||
type SsoConfigOutput struct {
|
||||
OidcDisplayName string `json:"oidcDisplayName"`
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
OidcDisplayName string `json:"oidcDisplayName"`
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
DingTalkDisplayName string `json:"dingTalkDisplayName"`
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName string
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName string
|
||||
if rt.Sso.OIDC != nil {
|
||||
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
|
||||
}
|
||||
@@ -478,23 +585,85 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
oauthDisplayName = rt.Sso.OAuth2.GetDisplayName()
|
||||
}
|
||||
|
||||
if rt.Sso.DingTalk != nil {
|
||||
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(SsoConfigOutput{
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
DingTalkDisplayName: dingTalkDisplayName,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(models.SsoConfigGets(rt.Ctx))
|
||||
var ssoConfigs []models.SsoConfig
|
||||
lst, err := models.SsoConfigGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
if len(lst) == 0 {
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
dingTalkExist := false
|
||||
for _, config := range lst {
|
||||
var ssoReqConfig models.SsoConfig
|
||||
ssoReqConfig.Id = config.Id
|
||||
ssoReqConfig.Name = config.Name
|
||||
ssoReqConfig.UpdateAt = config.UpdateAt
|
||||
switch config.Name {
|
||||
case dingtalk.SsoTypeName:
|
||||
dingTalkExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ssoReqConfig.Content = config.Content
|
||||
}
|
||||
|
||||
ssoConfigs = append(ssoConfigs, ssoReqConfig)
|
||||
}
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
if !dingTalkExist {
|
||||
var ssoConfig models.SsoConfig
|
||||
ssoConfig.Name = dingtalk.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
var f models.SsoConfig
|
||||
ginx.BindJSON(c, &f)
|
||||
var ssoConfig models.SsoConfig
|
||||
ginx.BindJSON(c, &ssoConfig)
|
||||
|
||||
err := f.Update(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
switch ssoConfig.Name {
|
||||
case dingtalk.SsoTypeName:
|
||||
f.Name = ssoConfig.Name
|
||||
setting, err := json.Marshal(ssoConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
f.Content = string(setting)
|
||||
f.UpdateAt = time.Now().Unix()
|
||||
sso, err := f.Query(rt.Ctx)
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
err = f.Create(rt.Ctx)
|
||||
} else {
|
||||
f.Id = sso.Id
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
f.Id = ssoConfig.Id
|
||||
f.Name = ssoConfig.Name
|
||||
f.Content = ssoConfig.Content
|
||||
err := f.Update(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
switch f.Name {
|
||||
case "LDAP":
|
||||
@@ -518,6 +687,14 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
err := toml.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
rt.Sso.OAuth2.Reload(config)
|
||||
case dingtalk.SsoTypeName:
|
||||
var config dingtalk.Config
|
||||
err := json.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
if rt.Sso.DingTalk == nil {
|
||||
rt.Sso.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
rt.Sso.DingTalk.Reload(config)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -193,10 +193,9 @@ func (rt *Router) eventsMessage(c *gin.Context) {
|
||||
events[i] = he.ToCur()
|
||||
}
|
||||
|
||||
var defs = []string{
|
||||
"{{$events := .}}",
|
||||
"{{$event := index . 0}}",
|
||||
}
|
||||
renderData := make(map[string]interface{})
|
||||
renderData["events"] = events
|
||||
defs := models.GetDefs(renderData)
|
||||
ret := make(map[string]string, len(req.Tpl.Content))
|
||||
for k, v := range req.Tpl.Content {
|
||||
text := strings.Join(append(defs, v), "")
|
||||
@@ -207,7 +206,7 @@ func (rt *Router) eventsMessage(c *gin.Context) {
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
err = tpl.Execute(&buf, events)
|
||||
err = tpl.Execute(&buf, renderData)
|
||||
if err != nil {
|
||||
ret[k] = err.Error()
|
||||
continue
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"math"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -13,12 +12,16 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// Return all, front-end search and paging
|
||||
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
|
||||
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -53,11 +56,17 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
|
||||
bgid := ginx.QueryInt64(c, "bgid", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
disabled := ginx.QueryInt(c, "disabled", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
|
||||
lst, err := models.AlertMuteGetsAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
|
||||
var f models.AlertMute
|
||||
@@ -67,18 +76,21 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
f.CreateBy = username
|
||||
f.UpdateBy = username
|
||||
f.GroupId = ginx.UrlParamInt64(c, "id")
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
|
||||
ginx.Dangerous(f.Add(rt.Ctx))
|
||||
ginx.NewRender(c).Data(f.Id, nil)
|
||||
}
|
||||
|
||||
type MuteTestForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertMute models.AlertMute `json:"mute_config" binding:"required"`
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertMute models.AlertMute `json:"config" binding:"required"`
|
||||
PassTimeCheck bool `json:"pass_time_check"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteTryRun(c *gin.Context) {
|
||||
|
||||
var f MuteTestForm
|
||||
ginx.BindJSON(c, &f)
|
||||
ginx.Dangerous(f.AlertMute.Verify())
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
@@ -90,18 +102,30 @@ func (rt *Router) alertMuteTryRun(c *gin.Context) {
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
// 绕过时间范围检查:设置时间范围为全量(0 到 int64 最大值),仅验证其他匹配条件(如标签、策略类型等)
|
||||
f.AlertMute.MuteTimeType = models.TimeRange
|
||||
f.AlertMute.Btime = 0 // 最小可能值(如 Unix 时间戳起点)
|
||||
f.AlertMute.Etime = math.MaxInt64 // 最大可能值(int64 上限)
|
||||
if f.PassTimeCheck {
|
||||
f.AlertMute.MuteTimeType = models.Periodic
|
||||
f.AlertMute.PeriodicMutesJson = []models.PeriodicMute{
|
||||
{
|
||||
EnableDaysOfWeek: "0 1 2 3 4 5 6",
|
||||
EnableStime: "00:00",
|
||||
EnableEtime: "00:00",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if !mute.MatchMute(&curEvent, &f.AlertMute) {
|
||||
ginx.NewRender(c).Data("not match", nil)
|
||||
match, err := mute.MatchMute(&curEvent, &f.AlertMute)
|
||||
if err != nil {
|
||||
// 对错误信息进行 i18n 翻译
|
||||
translatedErr := i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
ginx.Bomb(http.StatusBadRequest, translatedErr)
|
||||
}
|
||||
|
||||
if !match {
|
||||
ginx.NewRender(c).Data("event not match mute", nil)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("mute test match", nil)
|
||||
|
||||
ginx.NewRender(c).Data("event match mute", nil)
|
||||
}
|
||||
|
||||
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:
|
||||
|
||||
@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
|
||||
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
|
||||
}
|
||||
|
||||
func (rt *Router) wrapIdTokenKey(userId int64) string {
|
||||
return fmt.Sprintf("n9e_id_token_%d", userId)
|
||||
}
|
||||
|
||||
// saveIdToken 保存用户的 id_token 到 Redis
|
||||
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
|
||||
if idToken == "" {
|
||||
return nil
|
||||
}
|
||||
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
|
||||
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
|
||||
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
|
||||
}
|
||||
|
||||
// fetchIdToken 从 Redis 获取用户的 id_token
|
||||
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
|
||||
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
|
||||
}
|
||||
|
||||
// deleteIdToken 从 Redis 删除用户的 id_token
|
||||
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
|
||||
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
|
||||
}
|
||||
|
||||
type TokenDetails struct {
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
|
||||
@@ -33,7 +33,7 @@ type Record struct {
|
||||
|
||||
// notificationRecordAdd
|
||||
func (rt *Router) notificationRecordAdd(c *gin.Context) {
|
||||
var req []*models.NotificaitonRecord
|
||||
var req []*models.NotificationRecord
|
||||
ginx.BindJSON(c, &req)
|
||||
err := sender.PushNotifyRecords(req)
|
||||
ginx.Dangerous(err, 429)
|
||||
@@ -43,14 +43,14 @@ func (rt *Router) notificationRecordAdd(c *gin.Context) {
|
||||
|
||||
func (rt *Router) notificationRecordList(c *gin.Context) {
|
||||
eid := ginx.UrlParamInt64(c, "eid")
|
||||
lst, err := models.NotificaitonRecordsGetByEventId(rt.Ctx, eid)
|
||||
lst, err := models.NotificationRecordsGetByEventId(rt.Ctx, eid)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
response := buildNotificationResponse(rt.Ctx, lst)
|
||||
ginx.NewRender(c).Data(response, nil)
|
||||
}
|
||||
|
||||
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificaitonRecord) NotificationResponse {
|
||||
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificationRecord) NotificationResponse {
|
||||
response := NotificationResponse{
|
||||
SubRules: []SubRule{},
|
||||
Notifies: make(map[string][]Record),
|
||||
|
||||
@@ -162,21 +162,6 @@ func (rt *Router) notifyChannelIdentsGet(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
type flushDutyChannelsResponse struct {
|
||||
Error struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
Data struct {
|
||||
Items []struct {
|
||||
ChannelID int `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
Status string `json:"status"`
|
||||
} `json:"items"`
|
||||
Total int `json:"total"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
@@ -196,18 +181,31 @@ func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
|
||||
jsonData = []byte(fmt.Sprintf(`{"member_name":"%s","email":"%s","phone":"%s"}`, me.Username, me.Email, me.Phone))
|
||||
}
|
||||
|
||||
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData)
|
||||
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData, time.Duration(nc.RequestConfig.FlashDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(items, nil)
|
||||
}
|
||||
|
||||
// getFlashDutyChannels 从FlashDuty API获取频道列表
|
||||
func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
|
||||
type flushDutyChannelsResponse struct {
|
||||
Error struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
Data struct {
|
||||
Items []FlashDutyChannel `json:"items"`
|
||||
Total int `json:"total"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type FlashDutyChannel struct {
|
||||
ChannelID int `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
Status string `json:"status"`
|
||||
}, error) {
|
||||
}
|
||||
|
||||
// getFlashDutyChannels 从FlashDuty API获取频道列表
|
||||
func getFlashDutyChannels(integrationUrl string, jsonData []byte, timeout time.Duration) ([]FlashDutyChannel, error) {
|
||||
// 解析URL,提取baseUrl和参数
|
||||
baseUrl, integrationKey, err := parseIntegrationUrl(integrationUrl)
|
||||
if err != nil {
|
||||
@@ -227,7 +225,9 @@ func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpResp, err := (&http.Client{}).Do(req)
|
||||
httpResp, err := (&http.Client{
|
||||
Timeout: timeout,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -266,3 +266,149 @@ func parseIntegrationUrl(urlStr string) (baseUrl string, integrationKey string,
|
||||
|
||||
return host, integrationKey, nil
|
||||
}
|
||||
|
||||
func (rt *Router) pagerDutyNotifyServicesGet(c *gin.Context) {
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil || nc == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "notify channel not found")
|
||||
}
|
||||
|
||||
items, err := getPagerDutyServices(nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty services: %v", err))
|
||||
}
|
||||
// 服务: []集成,扁平化为服务-集成
|
||||
var flattenedItems []map[string]string
|
||||
for _, svc := range items {
|
||||
for _, integ := range svc.Integrations {
|
||||
flattenedItems = append(flattenedItems, map[string]string{
|
||||
"service_id": svc.ID,
|
||||
"service_name": svc.Name,
|
||||
"integration_summary": integ.Summary,
|
||||
"integration_id": integ.ID,
|
||||
"integration_url": integ.Self,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(flattenedItems, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) pagerDutyIntegrationKeyGet(c *gin.Context) {
|
||||
serviceId := ginx.UrlParamStr(c, "service_id")
|
||||
integrationId := ginx.UrlParamStr(c, "integration_id")
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil || nc == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "notify channel not found")
|
||||
}
|
||||
|
||||
integrationUrl := fmt.Sprintf("https://api.pagerduty.com/services/%s/integrations/%s", serviceId, integrationId)
|
||||
integrationKey, err := getPagerDutyIntegrationKey(integrationUrl, nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty integration key: %v", err))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(map[string]string{
|
||||
"integration_key": integrationKey,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type PagerDutyIntegration struct {
|
||||
ID string `json:"id"`
|
||||
IntegrationKey string `json:"integration_key"`
|
||||
Self string `json:"self"` // integration 的 API URL
|
||||
Summary string `json:"summary"`
|
||||
}
|
||||
|
||||
type PagerDutyService struct {
|
||||
Name string `json:"name"`
|
||||
ID string `json:"id"`
|
||||
Integrations []PagerDutyIntegration `json:"integrations"`
|
||||
}
|
||||
|
||||
// getPagerDutyServices 从 PagerDuty API 分页获取所有服务及其集成信息
|
||||
func getPagerDutyServices(apiKey string, timeout time.Duration) ([]PagerDutyService, error) {
|
||||
const limit = 100 // 每页最大数量
|
||||
var offset uint // 分页偏移量
|
||||
var allServices []PagerDutyService
|
||||
|
||||
for {
|
||||
// 构建带分页参数的 URL
|
||||
url := fmt.Sprintf("https://api.pagerduty.com/services?limit=%d&offset=%d", limit, offset)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
|
||||
req.Header.Set("Accept", "application/vnd.pagerduty+json;version=2")
|
||||
|
||||
httpResp, err := (&http.Client{Timeout: timeout}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(httpResp.Body)
|
||||
httpResp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 定义包含分页信息的响应结构
|
||||
var serviceRes struct {
|
||||
Services []PagerDutyService `json:"services"`
|
||||
More bool `json:"more"` // 是否还有更多数据
|
||||
Limit uint `json:"limit"`
|
||||
Offset uint `json:"offset"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &serviceRes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
allServices = append(allServices, serviceRes.Services...)
|
||||
// 判断是否还有更多数据
|
||||
if !serviceRes.More || len(serviceRes.Services) < int(limit) {
|
||||
break
|
||||
}
|
||||
offset += limit // 准备请求下一页
|
||||
}
|
||||
|
||||
return allServices, nil
|
||||
}
|
||||
|
||||
// getPagerDutyIntegrationKey 通过 integration 的 API URL 获取 integration key
|
||||
func getPagerDutyIntegrationKey(integrationUrl, apiKey string, timeout time.Duration) (string, error) {
|
||||
req, err := http.NewRequest("GET", integrationUrl, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
|
||||
|
||||
httpResp, err := (&http.Client{
|
||||
Timeout: timeout,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
body, err := io.ReadAll(httpResp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var integRes struct {
|
||||
Integration struct {
|
||||
IntegrationKey string `json:"integration_key"`
|
||||
} `json:"integration"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &integRes); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return integRes.Integration.IntegrationKey, nil
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ func TestGetFlashDutyChannels(t *testing.T) {
|
||||
jsonData := []byte(`{}`)
|
||||
|
||||
// 调用被测试的函数
|
||||
channels, err := getFlashDutyChannels(integrationUrl, jsonData)
|
||||
channels, err := getFlashDutyChannels(integrationUrl, jsonData, 5000)
|
||||
|
||||
fmt.Println(channels, err)
|
||||
}
|
||||
|
||||
@@ -162,7 +162,7 @@ func (rt *Router) notifyConfigPut(c *gin.Context) {
|
||||
ginx.Bomb(200, "key %s can not modify", f.Ckey)
|
||||
}
|
||||
username := c.MustGet("username").(string)
|
||||
//insert or update build-in config
|
||||
//insert or update built-in config
|
||||
ginx.Dangerous(models.ConfigsSetWithUname(rt.Ctx, f.Ckey, f.Cval, username))
|
||||
if f.Ckey == models.SMTP {
|
||||
// 重置邮件发送器
|
||||
@@ -219,8 +219,8 @@ func (rt *Router) notifyChannelConfigGets(c *gin.Context) {
|
||||
id := ginx.QueryInt64(c, "id", 0)
|
||||
name := ginx.QueryStr(c, "name", "")
|
||||
ident := ginx.QueryStr(c, "ident", "")
|
||||
eabled := ginx.QueryInt(c, "eabled", -1)
|
||||
enabled := ginx.QueryInt(c, "enabled", -1)
|
||||
|
||||
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, id, name, ident, eabled)
|
||||
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, id, name, ident, enabled)
|
||||
ginx.NewRender(c).Data(notifyChannels, err)
|
||||
}
|
||||
|
||||
@@ -6,11 +6,12 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/dispatch"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
@@ -152,103 +153,138 @@ func (rt *Router) notifyTest(c *gin.Context) {
|
||||
for _, he := range hisEvents {
|
||||
event := he.ToCur()
|
||||
event.SetTagsMap()
|
||||
if dispatch.NotifyRuleApplicable(&f.NotifyConfig, event) {
|
||||
events = append(events, event)
|
||||
if err := dispatch.NotifyRuleMatchCheck(&f.NotifyConfig, event); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
events = append(events, event)
|
||||
}
|
||||
|
||||
if len(events) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "not events applicable")
|
||||
resp, err := SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, f.NotifyConfig, events)
|
||||
if resp == "" {
|
||||
resp = "success"
|
||||
}
|
||||
ginx.NewRender(c).Data(resp, err)
|
||||
}
|
||||
|
||||
func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroup *memsto.UserGroupCacheType, notifyConfig models.NotifyConfig, events []*models.AlertCurEvent) (string, error) {
|
||||
notifyChannels, err := models.NotifyChannelGets(ctx, notifyConfig.ChannelID, "", "", -1)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get notify channels: %v", err)
|
||||
}
|
||||
|
||||
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, f.NotifyConfig.ChannelID, "", "", -1)
|
||||
ginx.Dangerous(err)
|
||||
if len(notifyChannels) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "notify channel not found")
|
||||
return "", fmt.Errorf("notify channel not found")
|
||||
}
|
||||
|
||||
notifyChannel := notifyChannels[0]
|
||||
|
||||
if !notifyChannel.Enable {
|
||||
ginx.Bomb(http.StatusBadRequest, "notify channel not enabled, please enable it first")
|
||||
return "", fmt.Errorf("notify channel not enabled, please enable it first")
|
||||
}
|
||||
|
||||
// 获取站点URL用于模板渲染
|
||||
siteUrl, _ := models.ConfigsGetSiteUrl(ctx)
|
||||
if siteUrl == "" {
|
||||
siteUrl = "http://127.0.0.1:17000"
|
||||
}
|
||||
|
||||
tplContent := make(map[string]interface{})
|
||||
if notifyChannel.RequestType != "flashtudy" {
|
||||
messageTemplates, err := models.MessageTemplateGets(rt.Ctx, f.NotifyConfig.TemplateID, "", "")
|
||||
ginx.Dangerous(err)
|
||||
if len(messageTemplates) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "message template not found")
|
||||
if notifyChannel.RequestType != "flashduty" {
|
||||
messageTemplates, err := models.MessageTemplateGets(ctx, notifyConfig.TemplateID, "", "")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get message templates: %v", err)
|
||||
}
|
||||
tplContent = messageTemplates[0].RenderEvent(events)
|
||||
}
|
||||
|
||||
if len(messageTemplates) == 0 {
|
||||
return "", fmt.Errorf("message template not found")
|
||||
}
|
||||
tplContent = messageTemplates[0].RenderEvent(events, siteUrl)
|
||||
}
|
||||
var contactKey string
|
||||
if notifyChannel.ParamConfig != nil && notifyChannel.ParamConfig.UserInfo != nil {
|
||||
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
|
||||
}
|
||||
|
||||
sendtos, flashDutyChannelIDs, customParams := dispatch.GetNotifyConfigParams(&f.NotifyConfig, contactKey, rt.UserCache, rt.UserGroupCache)
|
||||
sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams := dispatch.GetNotifyConfigParams(¬ifyConfig, contactKey, userCache, userGroup)
|
||||
|
||||
var resp string
|
||||
switch notifyChannel.RequestType {
|
||||
case "flashduty":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get http client: %v", err)
|
||||
}
|
||||
|
||||
for i := range flashDutyChannelIDs {
|
||||
resp, err = notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], client)
|
||||
if err != nil {
|
||||
break
|
||||
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
ginx.NewRender(c).Data(resp, err)
|
||||
return resp, nil
|
||||
case "pagerduty":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get http client: %v", err)
|
||||
}
|
||||
|
||||
for _, routingKey := range pagerDutyRoutingKeys {
|
||||
resp, err = notifyChannel.SendPagerDuty(events, routingKey, siteUrl, client)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
return resp, nil
|
||||
case "http":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get http client: %v", err)
|
||||
}
|
||||
|
||||
if notifyChannel.RequestConfig == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "request config not found")
|
||||
return "", fmt.Errorf("request config is nil")
|
||||
}
|
||||
|
||||
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "http request config not found")
|
||||
return "", fmt.Errorf("http request config is nil")
|
||||
}
|
||||
|
||||
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
|
||||
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send http notify: %v", err)
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
ginx.NewRender(c).Data(resp, err)
|
||||
return resp, nil
|
||||
} else {
|
||||
for i := range sendtos {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send http notify: %v", err)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Message(err)
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
case "smtp":
|
||||
if len(sendtos) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "No valid email address in the user and team")
|
||||
return "", fmt.Errorf("no valid email address in the user and team")
|
||||
}
|
||||
err := notifyChannel.SendEmailNow(events, tplContent, sendtos)
|
||||
ginx.NewRender(c).Message(err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send email notify: %v", err)
|
||||
}
|
||||
return resp, nil
|
||||
case "script":
|
||||
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
ginx.NewRender(c).Data(resp, err)
|
||||
return resp, err
|
||||
default:
|
||||
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
|
||||
ginx.NewRender(c).Message(errors.New("unsupported request type"))
|
||||
return "", fmt.Errorf("unsupported request type")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -302,8 +338,8 @@ func (rt *Router) notifyRuleCustomParamsGet(c *gin.Context) {
|
||||
filterKey := ""
|
||||
for key, value := range nc.Params {
|
||||
// 找到在通知媒介中的自定义变量配置项,进行 cname 转换
|
||||
cname, exsits := keyMap[key]
|
||||
if exsits {
|
||||
cname, exists := keyMap[key]
|
||||
if exists {
|
||||
list = append(list, paramList{
|
||||
Name: key,
|
||||
CName: cname,
|
||||
|
||||
58
center/router/router_opensearch.go
Normal file
58
center/router/router_opensearch.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) QueryOSIndices(c *gin.Context) {
|
||||
var f IndexReq
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
indices, err := plug.(*opensearch.OpenSearch).QueryIndices()
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(indices, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) QueryOSFields(c *gin.Context) {
|
||||
var f IndexReq
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
fields, err := plug.(*opensearch.OpenSearch).QueryFields([]string{f.Index})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(fields, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) QueryOSVariable(c *gin.Context) {
|
||||
var f FieldValueReq
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
fields, err := plug.(*opensearch.OpenSearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(fields, nil)
|
||||
}
|
||||
@@ -7,16 +7,20 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
)
|
||||
|
||||
type QueryFormItem struct {
|
||||
@@ -144,6 +148,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
|
||||
|
||||
if ds.AuthJson.BasicAuthUser != "" {
|
||||
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
|
||||
} else {
|
||||
req.Header.Del("Authorization")
|
||||
}
|
||||
|
||||
headerCount := len(ds.HTTPJson.Headers)
|
||||
@@ -235,3 +241,94 @@ func transportPut(dsid, updatedat int64, tran http.RoundTripper) {
|
||||
updatedAts[dsid] = updatedat
|
||||
transportsLock.Unlock()
|
||||
}
|
||||
|
||||
const (
|
||||
DatasourceTypePrometheus = "Prometheus"
|
||||
DatasourceTypeVictoriaMetrics = "VictoriaMetrics"
|
||||
)
|
||||
|
||||
type deleteDatasourceSeriesForm struct {
|
||||
DatasourceID int64 `json:"datasource_id"`
|
||||
Match []string `json:"match"`
|
||||
Start string `json:"start"`
|
||||
End string `json:"end"`
|
||||
}
|
||||
|
||||
func (rt *Router) deleteDatasourceSeries(c *gin.Context) {
|
||||
var ddsf deleteDatasourceSeriesForm
|
||||
ginx.BindJSON(c, &ddsf)
|
||||
ds := rt.DatasourceCache.GetById(ddsf.DatasourceID)
|
||||
|
||||
if ds == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "no such datasource")
|
||||
return
|
||||
}
|
||||
|
||||
// Get datasource type, now only support prometheus and victoriametrics
|
||||
datasourceType, ok := ds.SettingsJson["prometheus.tsdb_type"]
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusBadRequest, "datasource type not found, please check your datasource settings")
|
||||
return
|
||||
}
|
||||
|
||||
target, err := ds.HTTPJson.ParseUrl()
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "invalid urls: %s", ds.HTTPJson.GetUrls())
|
||||
return
|
||||
}
|
||||
|
||||
timeout := time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond
|
||||
matchQueries := make([]string, 0)
|
||||
for _, match := range ddsf.Match {
|
||||
matchQueries = append(matchQueries, fmt.Sprintf("match[]=%s", match))
|
||||
}
|
||||
matchQuery := strings.Join(matchQueries, "&")
|
||||
|
||||
switch datasourceType {
|
||||
case DatasourceTypePrometheus:
|
||||
// Prometheus delete api need POST method
|
||||
// https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
|
||||
url := fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s&start=%s&end=%s", target.Host, matchQuery, ddsf.Start, ddsf.End)
|
||||
go func() {
|
||||
resp, _, err := poster.PostJSON(url, timeout, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("delete series error datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
|
||||
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("delete datasource series datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
|
||||
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, string(resp))
|
||||
}()
|
||||
case DatasourceTypeVictoriaMetrics:
|
||||
// Delete API doesn’t support the deletion of specific time ranges.
|
||||
// Refer: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-delete-time-series
|
||||
var url string
|
||||
// Check VictoriaMetrics is single node or cluster
|
||||
// Cluster will have /select/<accountID>/prometheus pattern
|
||||
re := regexp.MustCompile(`/select/(\d+)/prometheus`)
|
||||
matches := re.FindStringSubmatch(ds.HTTPJson.Url)
|
||||
if len(matches) > 0 && matches[1] != "" {
|
||||
accountID, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "invalid accountID: %s", matches[1])
|
||||
}
|
||||
url = fmt.Sprintf("http://%s/delete/%d/prometheus/api/v1/admin/tsdb/delete_series?%s", target.Host, accountID, matchQuery)
|
||||
} else {
|
||||
url = fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s", target.Host, matchQuery)
|
||||
}
|
||||
go func() {
|
||||
resp, err := httplib.Get(url).SetTimeout(timeout).Response()
|
||||
if err != nil {
|
||||
logger.Errorf("delete series failed | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
|
||||
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("sending delete series request | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
|
||||
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, resp.Body)
|
||||
}()
|
||||
default:
|
||||
ginx.Bomb(http.StatusBadRequest, "not support delete series yet")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(nil, nil)
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
|
||||
for _, q := range f.Querys {
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
return nil, fmt.Errorf("forbidden")
|
||||
}
|
||||
@@ -127,7 +127,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
go func(query interface{}) {
|
||||
defer wg.Done()
|
||||
|
||||
datas, err := plug.QueryData(ctx.Request.Context(), query)
|
||||
data, err := plug.QueryData(ctx.Request.Context(), query)
|
||||
if err != nil {
|
||||
logger.Warningf("query data error: req:%+v err:%v", query, err)
|
||||
mu.Lock()
|
||||
@@ -136,9 +136,9 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debugf("query data: req:%+v resp:%+v", query, datas)
|
||||
logger.Debugf("query data: req:%+v resp:%+v", query, data)
|
||||
mu.Lock()
|
||||
resp = append(resp, datas...)
|
||||
resp = append(resp, data...)
|
||||
mu.Unlock()
|
||||
}(q)
|
||||
}
|
||||
@@ -183,7 +183,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
|
||||
for _, q := range f.Querys {
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
return LogResp{}, fmt.Errorf("forbidden")
|
||||
}
|
||||
@@ -242,7 +242,7 @@ func (rt *Router) QueryLog(c *gin.Context) {
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
var resp []interface{}
|
||||
for _, q := range f.Querys {
|
||||
for _, q := range f.Queries {
|
||||
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
|
||||
ginx.Bomb(200, "forbidden")
|
||||
}
|
||||
|
||||
@@ -149,6 +149,12 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
|
||||
f.Fields["datasource_queries"] = string(bytes)
|
||||
}
|
||||
|
||||
if datasourceIds, ok := f.Fields["datasource_ids"]; ok {
|
||||
bytes, err := json.Marshal(datasourceIds)
|
||||
ginx.Dangerous(err)
|
||||
f.Fields["datasource_ids"] = string(bytes)
|
||||
}
|
||||
|
||||
for i := 0; i < len(f.Ids); i++ {
|
||||
ar, err := models.RecordingRuleGetById(rt.Ctx, f.Ids[i])
|
||||
ginx.Dangerous(err)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -601,3 +602,10 @@ func (rt *Router) targetsOfHostQuery(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) targetUpdate(c *gin.Context) {
|
||||
var f idents.TargetUpdate
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
ginx.NewRender(c).Message(rt.IdentSet.UpdateTargets(f.Lst, f.Now))
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
@@ -12,6 +13,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
|
||||
@@ -233,5 +235,239 @@ func (rt *Router) userDel(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 如果要删除的用户是 admin 角色,检查是否是最后一个 admin
|
||||
if target.IsAdmin() {
|
||||
adminCount, err := models.CountAdminUsers(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if adminCount <= 1 {
|
||||
ginx.Bomb(http.StatusBadRequest, "Cannot delete the last admin user")
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(target.Del(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) installDateGet(c *gin.Context) {
|
||||
rootUser, err := models.UserGetByUsername(rt.Ctx, "root")
|
||||
if err != nil {
|
||||
logger.Errorf("get root user failed: %v", err)
|
||||
ginx.NewRender(c).Data(0, nil)
|
||||
return
|
||||
}
|
||||
|
||||
if rootUser == nil {
|
||||
logger.Errorf("root user not found")
|
||||
ginx.NewRender(c).Data(0, nil)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(rootUser.CreateAt, nil)
|
||||
}
|
||||
|
||||
// usersPhoneEncrypt 统一手机号加密
|
||||
func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
|
||||
users, err := models.UserGetAll(rt.Ctx)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 获取RSA密钥
|
||||
_, publicKey, _, err := models.GetRSAKeys(rt.Ctx)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 先启用手机号加密功能
|
||||
err = models.SetPhoneEncryptionEnabled(rt.Ctx, true)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("enable phone encryption failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 刷新配置缓存
|
||||
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
|
||||
// 回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
successCount := 0
|
||||
failCount := 0
|
||||
var failedUsers []string
|
||||
|
||||
// 使用事务处理所有用户的手机号加密
|
||||
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
|
||||
// 对每个用户的手机号进行加密
|
||||
for _, user := range users {
|
||||
if user.Phone == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if isPhoneEncrypted(user.Phone) {
|
||||
continue
|
||||
}
|
||||
|
||||
encryptedPhone, err := secu.EncryptValue(user.Phone, publicKey)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to encrypt phone for user %s: %v", user.Username, err)
|
||||
failCount++
|
||||
failedUsers = append(failedUsers, user.Username)
|
||||
continue
|
||||
}
|
||||
|
||||
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", encryptedPhone).Error
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
|
||||
failCount++
|
||||
failedUsers = append(failedUsers, user.Username)
|
||||
continue
|
||||
}
|
||||
|
||||
successCount++
|
||||
logger.Debugf("Successfully encrypted phone for user %s", user.Username)
|
||||
}
|
||||
|
||||
// 如果有失败的用户,回滚事务
|
||||
if failCount > 0 {
|
||||
return fmt.Errorf("encrypt failed users: %d, failed users: %v", failCount, failedUsers)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
// 加密失败,回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
|
||||
models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("encrypt phone failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"success_count": successCount,
|
||||
"fail_count": failCount,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) usersPhoneDecryptRefresh(c *gin.Context) {
|
||||
err := models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("refresh phone encryption cache failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
}
|
||||
|
||||
// usersPhoneDecrypt 统一手机号解密
|
||||
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
|
||||
// 先关闭手机号加密功能
|
||||
err := models.SetPhoneEncryptionEnabled(rt.Ctx, false)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("disable phone encryption failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 刷新配置缓存
|
||||
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
|
||||
// 回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 获取所有用户(此时加密开关已关闭,直接读取数据库原始数据)
|
||||
var users []*models.User
|
||||
err = models.DB(rt.Ctx).Find(&users).Error
|
||||
if err != nil {
|
||||
// 回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
|
||||
models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 获取RSA密钥
|
||||
privateKey, _, password, err := models.GetRSAKeys(rt.Ctx)
|
||||
if err != nil {
|
||||
// 回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
|
||||
models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
successCount := 0
|
||||
failCount := 0
|
||||
var failedUsers []string
|
||||
|
||||
// 使用事务处理所有用户的手机号解密
|
||||
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
|
||||
// 对每个用户的手机号进行解密
|
||||
for _, user := range users {
|
||||
if user.Phone == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// 检查是否是加密的手机号
|
||||
if !isPhoneEncrypted(user.Phone) {
|
||||
continue
|
||||
}
|
||||
|
||||
// 对手机号进行解密
|
||||
decryptedPhone, err := secu.Decrypt(user.Phone, privateKey, password)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to decrypt phone for user %s: %v", user.Username, err)
|
||||
failCount++
|
||||
failedUsers = append(failedUsers, user.Username)
|
||||
continue
|
||||
}
|
||||
|
||||
// 直接更新数据库中的手机号字段(绕过GORM钩子)
|
||||
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", decryptedPhone).Error
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
|
||||
failCount++
|
||||
failedUsers = append(failedUsers, user.Username)
|
||||
continue
|
||||
}
|
||||
|
||||
successCount++
|
||||
logger.Debugf("Successfully decrypted phone for user %s", user.Username)
|
||||
}
|
||||
|
||||
// 如果有失败的用户,回滚事务
|
||||
if failCount > 0 {
|
||||
return fmt.Errorf("decrypt failed users: %d, failed users: %v", failCount, failedUsers)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
// 解密失败,回滚配置
|
||||
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
|
||||
models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
ginx.NewRender(c).Message(fmt.Errorf("decrypt phone failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"success_count": successCount,
|
||||
"fail_count": failCount,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// isPhoneEncrypted 检查手机号是否已经加密
|
||||
func isPhoneEncrypted(phone string) bool {
|
||||
// 检查是否有 "enc:" 前缀标记
|
||||
return len(phone) > 4 && phone[:4] == "enc:"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package sso
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
@@ -24,6 +26,7 @@ type SsoClient struct {
|
||||
LDAP *ldapx.SsoClient
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
DingTalk *dingtalk.SsoClient
|
||||
LastUpdateTime int64
|
||||
configCache *memsto.ConfigCache
|
||||
configLastUpdateTime int64
|
||||
@@ -193,6 +196,13 @@ func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache
|
||||
log.Fatalln("init oauth2 failed:", err)
|
||||
}
|
||||
ssoClient.OAuth2 = oauth2x.New(config)
|
||||
case dingtalk.SsoTypeName:
|
||||
var config dingtalk.Config
|
||||
err := json.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalf("init %s failed: %s", dingtalk.SsoTypeName, err)
|
||||
}
|
||||
ssoClient.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,7 +228,9 @@ func (s *SsoClient) reload(ctx *ctx.Context) error {
|
||||
return err
|
||||
}
|
||||
userVariableMap := s.configCache.Get()
|
||||
ssoConfigMap := make(map[string]models.SsoConfig, 0)
|
||||
for _, cfg := range configs {
|
||||
ssoConfigMap[cfg.Name] = cfg
|
||||
cfg.Content = tplx.ReplaceTemplateUseText(cfg.Name, cfg.Content, userVariableMap)
|
||||
switch cfg.Name {
|
||||
case "LDAP":
|
||||
@@ -259,9 +271,26 @@ func (s *SsoClient) reload(ctx *ctx.Context) error {
|
||||
continue
|
||||
}
|
||||
s.OAuth2.Reload(config)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if dingTalkConfig, ok := ssoConfigMap[dingtalk.SsoTypeName]; ok {
|
||||
var config dingtalk.Config
|
||||
err := json.Unmarshal([]byte(dingTalkConfig.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warningf("reload %s failed: %s", dingtalk.SsoTypeName, err)
|
||||
} else {
|
||||
if s.DingTalk != nil {
|
||||
s.DingTalk.Reload(config)
|
||||
} else {
|
||||
s.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s.DingTalk = nil
|
||||
}
|
||||
|
||||
s.LastUpdateTime = lastUpdateTime
|
||||
s.configLastUpdateTime = lastCacheUpdateTime
|
||||
return nil
|
||||
|
||||
@@ -37,7 +37,7 @@ func Upgrade(configFile string) error {
|
||||
}
|
||||
}
|
||||
|
||||
authJosn := models.Auth{
|
||||
authJson := models.Auth{
|
||||
BasicAuthUser: cluster.BasicAuthUser,
|
||||
BasicAuthPassword: cluster.BasicAuthPass,
|
||||
}
|
||||
@@ -53,18 +53,18 @@ func Upgrade(configFile string) error {
|
||||
Headers: header,
|
||||
}
|
||||
|
||||
datasrouce := models.Datasource{
|
||||
datasource := models.Datasource{
|
||||
PluginId: 1,
|
||||
PluginType: "prometheus",
|
||||
PluginTypeName: "Prometheus Like",
|
||||
Name: cluster.Name,
|
||||
HTTPJson: httpJson,
|
||||
AuthJson: authJosn,
|
||||
AuthJson: authJson,
|
||||
ClusterName: "default",
|
||||
Status: "enabled",
|
||||
}
|
||||
|
||||
err = datasrouce.Add(ctx)
|
||||
err = datasource.Add(ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("add datasource %s error: %v", cluster.Name, err)
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
|
||||
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
|
||||
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
|
||||
|
||||
@@ -14,6 +14,13 @@ func decryptConfig(config *ConfigType, cryptoKey string) error {
|
||||
|
||||
config.DB.DSN = decryptDsn
|
||||
|
||||
decryptRedisPwd, err := secu.DealWithDecrypt(config.Redis.Password, cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt the redis password: %s", err)
|
||||
}
|
||||
|
||||
config.Redis.Password = decryptRedisPwd
|
||||
|
||||
for k := range config.HTTP.APIForService.BasicAuth {
|
||||
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.APIForService.BasicAuth[k], cryptoKey)
|
||||
if err != nil {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
|
||||
func cleanNotifyRecord(ctx *ctx.Context, day int) {
|
||||
lastWeek := time.Now().Unix() - 86400*int64(day)
|
||||
err := models.DB(ctx).Model(&models.NotificaitonRecord{}).Where("created_at < ?", lastWeek).Delete(&models.NotificaitonRecord{}).Error
|
||||
err := models.DB(ctx).Model(&models.NotificationRecord{}).Where("created_at < ?", lastWeek).Delete(&models.NotificationRecord{}).Error
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to clean notify record: %v", err)
|
||||
}
|
||||
|
||||
@@ -10,12 +10,20 @@ import (
|
||||
|
||||
"github.com/araddon/dateparse"
|
||||
"github.com/bitly/go-simplejson"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/olivere/elastic/v7"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type FixedField string
|
||||
|
||||
const (
|
||||
FieldIndex FixedField = "_index"
|
||||
FieldId FixedField = "_id"
|
||||
)
|
||||
|
||||
type Query struct {
|
||||
@@ -37,6 +45,18 @@ type Query struct {
|
||||
|
||||
Timeout int `json:"timeout" mapstructure:"timeout"`
|
||||
MaxShard int `json:"max_shard" mapstructure:"max_shard"`
|
||||
|
||||
SearchAfter *SearchAfter `json:"search_after" mapstructure:"search_after"`
|
||||
}
|
||||
|
||||
type SortField struct {
|
||||
Field string `json:"field" mapstructure:"field"`
|
||||
Ascending bool `json:"ascending" mapstructure:"ascending"`
|
||||
}
|
||||
|
||||
type SearchAfter struct {
|
||||
SortFields []SortField `json:"sort_fields" mapstructure:"sort_fields"` // 指定排序字段, 一般是timestamp:desc, _index:asc, _id:asc 三者组合,构成唯一的排序字段
|
||||
SearchAfter []interface{} `json:"search_after" mapstructure:"search_after"` // 指定排序字段的搜索值,搜索值必须和sort_fields的顺序一致,为上一次查询的最后一条日志的值
|
||||
}
|
||||
|
||||
type MetricAggr struct {
|
||||
@@ -64,9 +84,9 @@ type QueryFieldsFunc func(indices []string) ([]string, error)
|
||||
type GroupByCate string
|
||||
|
||||
const (
|
||||
Filters GroupByCate = "filters"
|
||||
Histgram GroupByCate = "histgram"
|
||||
Terms GroupByCate = "terms"
|
||||
Filters GroupByCate = "filters"
|
||||
Histogram GroupByCate = "histogram"
|
||||
Terms GroupByCate = "terms"
|
||||
)
|
||||
|
||||
// 参数
|
||||
@@ -158,7 +178,7 @@ func getUnixTs(timeStr string) int64 {
|
||||
return parsedTime.UnixMilli()
|
||||
}
|
||||
|
||||
func GetBuckts(labelKey string, keys []string, arr []interface{}, metrics *MetricPtr, labels string, ts int64, f string) {
|
||||
func GetBuckets(labelKey string, keys []string, arr []interface{}, metrics *MetricPtr, labels string, ts int64, f string) {
|
||||
var err error
|
||||
bucketsKey := ""
|
||||
if len(keys) > 0 {
|
||||
@@ -206,9 +226,9 @@ func GetBuckts(labelKey string, keys []string, arr []interface{}, metrics *Metri
|
||||
nextBucketsArr, exists := innerBuckets.(map[string]interface{})["buckets"]
|
||||
if exists {
|
||||
if len(keys[1:]) >= 1 {
|
||||
GetBuckts(bucketsKey, keys[1:], nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
|
||||
GetBuckets(bucketsKey, keys[1:], nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
|
||||
} else {
|
||||
GetBuckts(bucketsKey, []string{}, nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
|
||||
GetBuckets(bucketsKey, []string{}, nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -271,7 +291,10 @@ func MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, st
|
||||
}
|
||||
|
||||
for i := 0; i < len(eventTags); i++ {
|
||||
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
|
||||
arr := strings.SplitN(eventTags[i], "=", 2)
|
||||
if len(arr) == 2 {
|
||||
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
|
||||
}
|
||||
}
|
||||
|
||||
if len(eventTags) > 0 {
|
||||
@@ -295,7 +318,10 @@ func MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, sta
|
||||
}
|
||||
|
||||
for i := 0; i < len(eventTags); i++ {
|
||||
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
|
||||
arr := strings.SplitN(eventTags[i], "=", 2)
|
||||
if len(arr) == 2 {
|
||||
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
|
||||
}
|
||||
}
|
||||
|
||||
if len(eventTags) > 0 {
|
||||
@@ -379,7 +405,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
}
|
||||
|
||||
q.Gte(time.Unix(start, 0).UnixMilli())
|
||||
q.Lte(time.Unix(end, 0).UnixMilli())
|
||||
q.Lt(time.Unix(end, 0).UnixMilli())
|
||||
q.Format("epoch_millis")
|
||||
|
||||
field := param.MetricAggr.Field
|
||||
@@ -415,8 +441,32 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
Field(param.DateField).
|
||||
MinDocCount(1)
|
||||
|
||||
if strings.HasPrefix(version, "7") {
|
||||
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval))
|
||||
versionParts := strings.Split(version, ".")
|
||||
major := 0
|
||||
if len(versionParts) > 0 {
|
||||
if m, err := strconv.Atoi(versionParts[0]); err == nil {
|
||||
major = m
|
||||
}
|
||||
}
|
||||
minor := 0
|
||||
if len(versionParts) > 1 {
|
||||
if m, err := strconv.Atoi(versionParts[1]); err == nil {
|
||||
minor = m
|
||||
}
|
||||
}
|
||||
|
||||
if major >= 7 {
|
||||
// 添加偏移量,使第一个分桶bucket的左边界对齐为 start 时间
|
||||
offset := (start % param.Interval) - param.Interval
|
||||
|
||||
// 使用 fixed_interval 的条件:ES 7.2+ 或者任何 major > 7(例如 ES8)
|
||||
if (major > 7) || (major == 7 && minor >= 2) {
|
||||
// ES 7.2+ 以及 ES8+ 使用 fixed_interval
|
||||
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
|
||||
} else {
|
||||
// 7.0-7.1 使用 interval(带 offset)
|
||||
tsAggr.Interval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
|
||||
}
|
||||
} else {
|
||||
// 兼容 7.0 以下的版本
|
||||
// OpenSearch 也使用这个字段
|
||||
@@ -443,7 +493,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
} else {
|
||||
groupByAggregation = elastic.NewTermsAggregation().Field(groupBy.Field).OrderByKeyDesc().Size(groupBy.Size).MinDocCount(int(groupBy.MinDocCount))
|
||||
}
|
||||
case Histgram:
|
||||
case Histogram:
|
||||
if param.MetricAggr.Func != "count" {
|
||||
groupByAggregation = elastic.NewHistogramAggregation().Field(groupBy.Field).Interval(float64(groupBy.Interval)).SubAggregation(field, aggr)
|
||||
} else {
|
||||
@@ -473,7 +523,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
switch groupBy.Cate {
|
||||
case Terms:
|
||||
groupByAggregation = elastic.NewTermsAggregation().Field(groupBy.Field).SubAggregation(groupBys[i-1].Field, groupByAggregation).OrderByKeyDesc().Size(groupBy.Size).MinDocCount(int(groupBy.MinDocCount))
|
||||
case Histgram:
|
||||
case Histogram:
|
||||
groupByAggregation = elastic.NewHistogramAggregation().Field(groupBy.Field).Interval(float64(groupBy.Interval)).SubAggregation(groupBys[i-1].Field, groupByAggregation)
|
||||
case Filters:
|
||||
for _, filterParam := range groupBy.Params {
|
||||
@@ -534,7 +584,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
|
||||
metrics := &MetricPtr{Data: make(map[string][][]float64)}
|
||||
|
||||
GetBuckts("", keys, bucketsData, metrics, "", 0, param.MetricAggr.Func)
|
||||
GetBuckets("", keys, bucketsData, metrics, "", 0, param.MetricAggr.Func)
|
||||
|
||||
items, err := TransferData(fmt.Sprintf("%s_%s", field, param.MetricAggr.Func), param.Ref, metrics.Data), nil
|
||||
|
||||
@@ -582,8 +632,8 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
now := time.Now().Unix()
|
||||
var start, end int64
|
||||
if param.End != 0 && param.Start != 0 {
|
||||
end = param.End - param.End%param.Interval
|
||||
start = param.Start - param.Start%param.Interval
|
||||
end = param.End
|
||||
start = param.Start
|
||||
} else {
|
||||
end = now
|
||||
start = end - param.Interval
|
||||
@@ -591,7 +641,7 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
|
||||
q := elastic.NewRangeQuery(param.DateField)
|
||||
q.Gte(time.Unix(start, 0).UnixMilli())
|
||||
q.Lte(time.Unix(end, 0).UnixMilli())
|
||||
q.Lt(time.Unix(end, 0).UnixMilli())
|
||||
q.Format("epoch_millis")
|
||||
|
||||
queryString := GetQueryString(param.Filter, q)
|
||||
@@ -603,14 +653,27 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
if param.MaxShard < 1 {
|
||||
param.MaxShard = maxShard
|
||||
}
|
||||
|
||||
// from+size 分页方式获取日志,受es 的max_result_window参数限制,默认最多返回1w条日志, 可以使用search_after方式获取更多日志
|
||||
source := elastic.NewSearchSource().
|
||||
TrackTotalHits(true).
|
||||
Query(queryString).
|
||||
From(param.P).
|
||||
Size(param.Limit).
|
||||
Sort(param.DateField, param.Ascending)
|
||||
|
||||
Size(param.Limit)
|
||||
// 是否使用search_after方式
|
||||
if param.SearchAfter != nil {
|
||||
// 设置默认排序字段
|
||||
if len(param.SearchAfter.SortFields) == 0 {
|
||||
source = source.Sort(param.DateField, param.Ascending).Sort(string(FieldIndex), true).Sort(string(FieldId), true)
|
||||
} else {
|
||||
for _, field := range param.SearchAfter.SortFields {
|
||||
source = source.Sort(field.Field, field.Ascending)
|
||||
}
|
||||
}
|
||||
if len(param.SearchAfter.SearchAfter) > 0 {
|
||||
source = source.SearchAfter(param.SearchAfter.SearchAfter...)
|
||||
}
|
||||
} else {
|
||||
source = source.From(param.P).Sort(param.DateField, param.Ascending)
|
||||
}
|
||||
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
|
||||
if err != nil {
|
||||
logger.Warningf("query data error:%v", err)
|
||||
@@ -632,7 +695,7 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
var x map[string]interface{}
|
||||
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
|
||||
if err != nil {
|
||||
logger.Warningf("Unmarshal soruce error:%v", err)
|
||||
logger.Warningf("Unmarshal source error:%v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -69,9 +69,9 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
type NewDatasrouceFn func(settings map[string]interface{}) (Datasource, error)
|
||||
type NewDatasourceFn func(settings map[string]interface{}) (Datasource, error)
|
||||
|
||||
var datasourceRegister = map[string]NewDatasrouceFn{}
|
||||
var datasourceRegister = map[string]NewDatasourceFn{}
|
||||
|
||||
type Datasource interface {
|
||||
Init(settings map[string]interface{}) (Datasource, error) // 初始化配置
|
||||
|
||||
213
datasource/doris/doris.go
Normal file
213
datasource/doris/doris.go
Normal file
@@ -0,0 +1,213 @@
|
||||
package doris
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource"
|
||||
"github.com/ccfos/nightingale/v6/dskit/doris"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
"github.com/ccfos/nightingale/v6/pkg/macros"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
DorisType = "doris"
|
||||
)
|
||||
|
||||
func init() {
|
||||
datasource.RegisterDatasource(DorisType, new(Doris))
|
||||
}
|
||||
|
||||
type Doris struct {
|
||||
doris.Doris `json:",inline" mapstructure:",squash"`
|
||||
}
|
||||
|
||||
type QueryParam struct {
|
||||
Ref string `json:"ref" mapstructure:"ref"`
|
||||
Database string `json:"database" mapstructure:"database"`
|
||||
Table string `json:"table" mapstructure:"table"`
|
||||
SQL string `json:"sql" mapstructure:"sql"`
|
||||
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
|
||||
Limit int `json:"limit" mapstructure:"limit"`
|
||||
From int64 `json:"from" mapstructure:"from"`
|
||||
To int64 `json:"to" mapstructure:"to"`
|
||||
TimeField string `json:"time_field" mapstructure:"time_field"`
|
||||
TimeFormat string `json:"time_format" mapstructure:"time_format"`
|
||||
}
|
||||
|
||||
func (d *Doris) InitClient() error {
|
||||
if len(d.Addr) == 0 {
|
||||
return fmt.Errorf("not found doris addr, please check datasource config")
|
||||
}
|
||||
if _, err := d.NewConn(context.TODO(), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Doris) Init(settings map[string]interface{}) (datasource.Datasource, error) {
|
||||
newest := new(Doris)
|
||||
err := mapstructure.Decode(settings, newest)
|
||||
return newest, err
|
||||
}
|
||||
|
||||
func (d *Doris) Validate(ctx context.Context) error {
|
||||
if len(d.Addr) == 0 || len(strings.TrimSpace(d.Addr)) == 0 {
|
||||
return fmt.Errorf("doris addr is invalid, please check datasource setting")
|
||||
}
|
||||
|
||||
if len(strings.TrimSpace(d.User)) == 0 {
|
||||
return fmt.Errorf("doris user is invalid, please check datasource setting")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Equal compares whether two objects are the same, used for caching
|
||||
func (d *Doris) Equal(p datasource.Datasource) bool {
|
||||
newest, ok := p.(*Doris)
|
||||
if !ok {
|
||||
logger.Errorf("unexpected plugin type, expected is doris")
|
||||
return false
|
||||
}
|
||||
|
||||
// only compare first shard
|
||||
if d.Addr != newest.Addr {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.User != newest.User {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.Password != newest.Password {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.EnableWrite != newest.EnableWrite {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.FeAddr != newest.FeAddr {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxQueryRows != newest.MaxQueryRows {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.Timeout != newest.Timeout {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxIdleConns != newest.MaxIdleConns {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxOpenConns != newest.MaxOpenConns {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.ConnMaxLifetime != newest.ConnMaxLifetime {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.ClusterName != newest.ClusterName {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (d *Doris) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *Doris) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *Doris) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error) {
|
||||
dorisQueryParam := new(QueryParam)
|
||||
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if dorisQueryParam.Keys.ValueKey == "" {
|
||||
return nil, fmt.Errorf("valueKey is required")
|
||||
}
|
||||
|
||||
items, err := d.QueryTimeseries(context.TODO(), &doris.QueryParam{
|
||||
Database: dorisQueryParam.Database,
|
||||
Sql: dorisQueryParam.SQL,
|
||||
Keys: types.Keys{
|
||||
ValueKey: dorisQueryParam.Keys.ValueKey,
|
||||
LabelKey: dorisQueryParam.Keys.LabelKey,
|
||||
TimeKey: dorisQueryParam.Keys.TimeKey,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
|
||||
return []models.DataResp{}, err
|
||||
}
|
||||
data := make([]models.DataResp, 0)
|
||||
for i := range items {
|
||||
data = append(data, models.DataResp{
|
||||
Ref: dorisQueryParam.Ref,
|
||||
Metric: items[i].Metric,
|
||||
Values: items[i].Values,
|
||||
})
|
||||
}
|
||||
|
||||
// parse resp to time series data
|
||||
logger.Infof("req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error) {
|
||||
dorisQueryParam := new(QueryParam)
|
||||
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if strings.Contains(dorisQueryParam.SQL, "$__") {
|
||||
var err error
|
||||
dorisQueryParam.SQL, err = macros.Macro(dorisQueryParam.SQL, dorisQueryParam.From, dorisQueryParam.To)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
items, err := d.QueryLogs(ctx, &doris.QueryParam{
|
||||
Database: dorisQueryParam.Database,
|
||||
Sql: dorisQueryParam.SQL,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
|
||||
return []interface{}{}, 0, err
|
||||
}
|
||||
logs := make([]interface{}, 0)
|
||||
for i := range items {
|
||||
logs = append(logs, items[i])
|
||||
}
|
||||
|
||||
return logs, int64(len(logs)), nil
|
||||
}
|
||||
|
||||
func (d *Doris) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {
|
||||
dorisQueryParam := new(QueryParam)
|
||||
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return d.DescTable(ctx, dorisQueryParam.Database, dorisQueryParam.Table)
|
||||
}
|
||||
@@ -106,6 +106,29 @@ func (e *Elasticsearch) InitClient() error {
|
||||
options = append(options, elastic.SetHealthcheck(false))
|
||||
|
||||
e.Client, err = elastic.NewClient(options...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if e.Client != nil {
|
||||
for _, addr := range e.Nodes {
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
if ver, verr := e.Client.ElasticsearchVersion(addr); verr == nil {
|
||||
logger.Infof("detected elasticsearch version from %s: %s", addr, ver)
|
||||
e.Version = ver
|
||||
e.Addr = addr
|
||||
break
|
||||
} else {
|
||||
logger.Debugf("detect version failed from %s: %v", addr, verr)
|
||||
}
|
||||
}
|
||||
if e.Version == "" {
|
||||
logger.Warning("failed to detect elasticsearch version from configured nodes, keep configured version")
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -183,7 +206,6 @@ func (e *Elasticsearch) MakeTSQuery(ctx context.Context, query interface{}, even
|
||||
}
|
||||
|
||||
func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
|
||||
|
||||
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
|
||||
return e.Client.Search().
|
||||
Index(indices...).
|
||||
@@ -193,7 +215,6 @@ func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) (
|
||||
MaxConcurrentShardRequests(maxShard).
|
||||
Do(ctx)
|
||||
}
|
||||
|
||||
return eslike.QueryData(ctx, queryParam, e.Timeout, e.Version, search)
|
||||
}
|
||||
|
||||
@@ -203,9 +224,9 @@ func (e *Elasticsearch) QueryIndices() ([]string, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (e *Elasticsearch) QueryFields(indexs []string) ([]string, error) {
|
||||
func (e *Elasticsearch) QueryFields(indexes []string) ([]string, error) {
|
||||
var fields []string
|
||||
result, err := elastic.NewGetFieldMappingService(e.Client).Index(indexs...).IgnoreUnavailable(true).Do(context.Background())
|
||||
result, err := elastic.NewGetFieldMappingService(e.Client).Index(indexes...).IgnoreUnavailable(true).Do(context.Background())
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
@@ -223,7 +244,7 @@ func (e *Elasticsearch) QueryFields(indexs []string) ([]string, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, exsits := fieldMap[kk]; !exsits {
|
||||
if _, exists := fieldMap[kk]; !exists {
|
||||
fieldMap[kk] = struct{}{}
|
||||
fields = append(fields, kk)
|
||||
}
|
||||
@@ -235,7 +256,7 @@ func (e *Elasticsearch) QueryFields(indexs []string) ([]string, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, exsits := fieldMap[k]; !exsits {
|
||||
if _, exists := fieldMap[k]; !exists {
|
||||
fieldMap[k] = struct{}{}
|
||||
fields = append(fields, k)
|
||||
}
|
||||
@@ -275,11 +296,11 @@ func (e *Elasticsearch) QueryLog(ctx context.Context, queryParam interface{}) ([
|
||||
return eslike.QueryLog(ctx, queryParam, e.Timeout, e.Version, e.MaxShard, search)
|
||||
}
|
||||
|
||||
func (e *Elasticsearch) QueryFieldValue(indexs []string, field string, query string) ([]string, error) {
|
||||
func (e *Elasticsearch) QueryFieldValue(indexes []string, field string, query string) ([]string, error) {
|
||||
var values []string
|
||||
search := e.Client.Search().
|
||||
IgnoreUnavailable(true).
|
||||
Index(indexs...).
|
||||
Index(indexes...).
|
||||
Size(0)
|
||||
|
||||
if query != "" {
|
||||
@@ -399,6 +420,9 @@ func (e *Elasticsearch) QueryMapData(ctx context.Context, query interface{}) ([]
|
||||
|
||||
// 将处理好的 map 添加到 m 切片中
|
||||
result = append(result, mItem)
|
||||
if param.Limit > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// 只取第一条数据
|
||||
break
|
||||
|
||||
401
datasource/opensearch/opensearch.go
Normal file
401
datasource/opensearch/opensearch.go
Normal file
@@ -0,0 +1,401 @@
|
||||
package opensearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource"
|
||||
"github.com/ccfos/nightingale/v6/datasource/commons/eslike"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tlsx"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/olivere/elastic/v7"
|
||||
oscliv2 "github.com/opensearch-project/opensearch-go/v2"
|
||||
osapiv2 "github.com/opensearch-project/opensearch-go/v2/opensearchapi"
|
||||
)
|
||||
|
||||
const (
|
||||
OpenSearchType = "opensearch"
|
||||
)
|
||||
|
||||
type OpenSearch struct {
|
||||
Addr string `json:"os.addr" mapstructure:"os.addr"`
|
||||
Nodes []string `json:"os.nodes" mapstructure:"os.nodes"`
|
||||
Timeout int64 `json:"os.timeout" mapstructure:"os.timeout"` // millis
|
||||
Basic BasicAuth `json:"os.basic" mapstructure:"os.basic"`
|
||||
TLS TLS `json:"os.tls" mapstructure:"os.tls"`
|
||||
Version string `json:"os.version" mapstructure:"os.version"`
|
||||
Headers map[string]string `json:"os.headers" mapstructure:"os.headers"`
|
||||
MinInterval int `json:"os.min_interval" mapstructure:"os.min_interval"` // seconds
|
||||
MaxShard int `json:"os.max_shard" mapstructure:"os.max_shard"`
|
||||
ClusterName string `json:"os.cluster_name" mapstructure:"os.cluster_name"`
|
||||
Client *oscliv2.Client `json:"os.client" mapstructure:"os.client"`
|
||||
}
|
||||
|
||||
type TLS struct {
|
||||
SkipTlsVerify bool `json:"os.tls.skip_tls_verify" mapstructure:"os.tls.skip_tls_verify"`
|
||||
}
|
||||
|
||||
type BasicAuth struct {
|
||||
Enable bool `json:"os.auth.enable" mapstructure:"os.auth.enable"`
|
||||
Username string `json:"os.user" mapstructure:"os.user"`
|
||||
Password string `json:"os.password" mapstructure:"os.password"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
datasource.RegisterDatasource(OpenSearchType, new(OpenSearch))
|
||||
}
|
||||
|
||||
func (os *OpenSearch) Init(settings map[string]interface{}) (datasource.Datasource, error) {
|
||||
newest := new(OpenSearch)
|
||||
err := mapstructure.Decode(settings, newest)
|
||||
return newest, err
|
||||
}
|
||||
|
||||
func (os *OpenSearch) InitClient() error {
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(os.Timeout) * time.Millisecond,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: time.Duration(os.Timeout) * time.Millisecond,
|
||||
}
|
||||
|
||||
if len(os.Nodes) > 0 {
|
||||
os.Addr = os.Nodes[0]
|
||||
}
|
||||
|
||||
if strings.Contains(os.Addr, "https") {
|
||||
tlsConfig := tlsx.ClientConfig{
|
||||
InsecureSkipVerify: os.TLS.SkipTlsVerify,
|
||||
UseTLS: true,
|
||||
}
|
||||
cfg, err := tlsConfig.TLSConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
transport.TLSClientConfig = cfg
|
||||
}
|
||||
|
||||
headers := http.Header{}
|
||||
for k, v := range os.Headers {
|
||||
headers[k] = []string{v}
|
||||
}
|
||||
|
||||
options := oscliv2.Config{
|
||||
Addresses: os.Nodes,
|
||||
Transport: transport,
|
||||
Header: headers,
|
||||
}
|
||||
|
||||
// 只要有用户名就添加认证,不依赖 Enable 字段
|
||||
if os.Basic.Username != "" {
|
||||
options.Username = os.Basic.Username
|
||||
options.Password = os.Basic.Password
|
||||
}
|
||||
|
||||
var err = error(nil)
|
||||
os.Client, err = oscliv2.NewClient(options)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (os *OpenSearch) Equal(other datasource.Datasource) bool {
|
||||
sort.Strings(os.Nodes)
|
||||
sort.Strings(other.(*OpenSearch).Nodes)
|
||||
|
||||
if strings.Join(os.Nodes, ",") != strings.Join(other.(*OpenSearch).Nodes, ",") {
|
||||
return false
|
||||
}
|
||||
|
||||
if os.Basic.Username != other.(*OpenSearch).Basic.Username {
|
||||
return false
|
||||
}
|
||||
|
||||
if os.Basic.Password != other.(*OpenSearch).Basic.Password {
|
||||
return false
|
||||
}
|
||||
|
||||
if os.TLS.SkipTlsVerify != other.(*OpenSearch).TLS.SkipTlsVerify {
|
||||
return false
|
||||
}
|
||||
|
||||
if os.Timeout != other.(*OpenSearch).Timeout {
|
||||
return false
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(os.Headers, other.(*OpenSearch).Headers) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (os *OpenSearch) Validate(ctx context.Context) (err error) {
|
||||
if len(os.Nodes) == 0 {
|
||||
return fmt.Errorf("need a valid addr")
|
||||
}
|
||||
|
||||
for _, addr := range os.Nodes {
|
||||
_, err = url.Parse(addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse addr error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 如果提供了用户名,必须同时提供密码
|
||||
if len(os.Basic.Username) > 0 && len(os.Basic.Password) == 0 {
|
||||
return fmt.Errorf("password is required when username is provided")
|
||||
}
|
||||
|
||||
if os.MaxShard == 0 {
|
||||
os.MaxShard = 5
|
||||
}
|
||||
|
||||
if os.MinInterval < 10 {
|
||||
os.MinInterval = 10
|
||||
}
|
||||
|
||||
if os.Timeout == 0 {
|
||||
os.Timeout = 6000
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(os.Version, "2") {
|
||||
return fmt.Errorf("version must be 2.0+")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (os *OpenSearch) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
|
||||
return eslike.MakeLogQuery(ctx, query, eventTags, start, end)
|
||||
}
|
||||
|
||||
func (os *OpenSearch) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
|
||||
return eslike.MakeTSQuery(ctx, query, eventTags, start, end)
|
||||
}
|
||||
|
||||
func search(ctx context.Context, indices []string, source interface{}, timeout int, cli *oscliv2.Client) (*elastic.SearchResult, error) {
|
||||
var body *bytes.Buffer = nil
|
||||
if source != nil {
|
||||
body = new(bytes.Buffer)
|
||||
err := json.NewEncoder(body).Encode(source)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
req := osapiv2.SearchRequest{
|
||||
Index: indices,
|
||||
Body: body,
|
||||
}
|
||||
|
||||
if timeout > 0 {
|
||||
req.Timeout = time.Second * time.Duration(timeout)
|
||||
}
|
||||
|
||||
resp, err := req.Do(ctx, cli)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("opensearch response not 2xx, resp is %v", resp)
|
||||
}
|
||||
|
||||
bs, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result := new(elastic.SearchResult)
|
||||
err = json.Unmarshal(bs, &result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
|
||||
|
||||
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
|
||||
return search(ctx, indices, source, timeout, os.Client)
|
||||
}
|
||||
|
||||
return eslike.QueryData(ctx, queryParam, os.Timeout, os.Version, search)
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryIndices() ([]string, error) {
|
||||
|
||||
cir := osapiv2.CatIndicesRequest{
|
||||
Format: "json",
|
||||
}
|
||||
|
||||
rsp, err := cir.Do(context.Background(), os.Client)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rsp.Body.Close()
|
||||
|
||||
bs, err := io.ReadAll(rsp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := make([]struct {
|
||||
Index string `json:"index"`
|
||||
}, 0)
|
||||
|
||||
err = json.Unmarshal(bs, &resp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []string
|
||||
for _, k := range resp {
|
||||
ret = append(ret, k.Index)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryFields(indices []string) ([]string, error) {
|
||||
|
||||
var fields []string
|
||||
mappingRequest := osapiv2.IndicesGetMappingRequest{
|
||||
Index: indices,
|
||||
}
|
||||
|
||||
resp, err := mappingRequest.Do(context.Background(), os.Client)
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
bs, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
|
||||
result := map[string]interface{}{}
|
||||
|
||||
err = json.Unmarshal(bs, &result)
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
|
||||
idx := ""
|
||||
if len(indices) > 0 {
|
||||
idx = indices[0]
|
||||
}
|
||||
|
||||
mappingIndex := ""
|
||||
indexReg, _ := regexp.Compile(idx)
|
||||
for key, value := range result {
|
||||
mappings, ok := value.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if len(mappings) == 0 {
|
||||
continue
|
||||
}
|
||||
if key == idx || strings.Contains(key, idx) ||
|
||||
(indexReg != nil && indexReg.MatchString(key)) {
|
||||
mappingIndex = key
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(mappingIndex) == 0 {
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
fields = propertyMappingRange(result[mappingIndex], 1)
|
||||
|
||||
sort.Strings(fields)
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
func propertyMappingRange(v interface{}, depth int) (fields []string) {
|
||||
mapping, ok := v.(map[string]interface{})
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if len(mapping) == 0 {
|
||||
return
|
||||
}
|
||||
for key, value := range mapping {
|
||||
if reflect.TypeOf(value).Kind() == reflect.Map {
|
||||
valueMap := value.(map[string]interface{})
|
||||
if prop, found := valueMap["properties"]; found {
|
||||
subFields := propertyMappingRange(prop, depth+1)
|
||||
for i := range subFields {
|
||||
if depth == 1 {
|
||||
fields = append(fields, subFields[i])
|
||||
} else {
|
||||
fields = append(fields, key+"."+subFields[i])
|
||||
}
|
||||
}
|
||||
} else if typ, found := valueMap["type"]; found {
|
||||
if eslike.HitFilter(typ.(string)) {
|
||||
continue
|
||||
}
|
||||
fields = append(fields, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryLog(ctx context.Context, queryParam interface{}) ([]interface{}, int64, error) {
|
||||
|
||||
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
|
||||
return search(ctx, indices, source, timeout, os.Client)
|
||||
}
|
||||
|
||||
return eslike.QueryLog(ctx, queryParam, os.Timeout, os.Version, 0, search)
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryFieldValue(indexes []string, field string, query string) ([]string, error) {
|
||||
var values []string
|
||||
source := elastic.NewSearchSource().
|
||||
Size(0)
|
||||
|
||||
if query != "" {
|
||||
source = source.Query(elastic.NewBoolQuery().Must(elastic.NewQueryStringQuery(query)))
|
||||
}
|
||||
source = source.Aggregation("distinct", elastic.NewTermsAggregation().Field(field).Size(10000))
|
||||
|
||||
result, err := search(context.Background(), indexes, source, 0, os.Client)
|
||||
if err != nil {
|
||||
return values, err
|
||||
}
|
||||
|
||||
agg, found := result.Aggregations.Terms("distinct")
|
||||
if !found {
|
||||
return values, nil
|
||||
}
|
||||
|
||||
for _, bucket := range agg.Buckets {
|
||||
values = append(values, bucket.Key.(string))
|
||||
}
|
||||
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func (os *OpenSearch) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -23,7 +23,7 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
regx = "(?i)from\\s+([a-zA-Z0-9_]+)\\.([a-zA-Z0-9_]+)\\.([a-zA-Z0-9_]+)"
|
||||
regx = `(?i)from\s+((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))`
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -162,6 +162,7 @@ func (p *PostgreSQL) QueryData(ctx context.Context, query interface{}) ([]models
|
||||
return nil, err
|
||||
}
|
||||
|
||||
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
|
||||
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
|
||||
var err error
|
||||
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
|
||||
@@ -229,6 +230,7 @@ func (p *PostgreSQL) QueryLog(ctx context.Context, query interface{}) ([]interfa
|
||||
p.Shards[0].DB = db
|
||||
}
|
||||
|
||||
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
|
||||
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
|
||||
var err error
|
||||
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
|
||||
@@ -280,7 +282,17 @@ func parseDBName(sql string) (db string, err error) {
|
||||
if len(matches) != 4 {
|
||||
return "", fmt.Errorf("no valid table name in format database.schema.table found")
|
||||
}
|
||||
return matches[1], nil
|
||||
return strings.Trim(matches[1], `"`), nil
|
||||
}
|
||||
|
||||
// formatSQLDatabaseNameWithRegex 只对 dbname.scheme.tabname 格式进行数据库名称格式化,转为 "dbname".scheme.tabname
|
||||
// 在pgsql中,大小写是通过"" 双引号括起来区分的,默认pg都是转为小写的,所以这里转为 "dbname".scheme."tabname"
|
||||
func formatSQLDatabaseNameWithRegex(sql string) string {
|
||||
// 匹配 from dbname.scheme.table_name 的模式
|
||||
// 使用捕获组来精确匹配数据库名称,确保后面跟着scheme和table
|
||||
re := regexp.MustCompile(`(?i)\bfrom\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)`)
|
||||
|
||||
return re.ReplaceAllString(sql, `from "$1"."$2"."$3"`)
|
||||
}
|
||||
|
||||
func extractColumns(sql string) ([]string, error) {
|
||||
|
||||
@@ -120,7 +120,7 @@ Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
- 补充和完善文档 => [n9e.github.io](https://n9e.github.io/)
|
||||
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale/share/)
|
||||
- 提交产品建议 =》 [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/ccfos/nightingale/pulls)
|
||||
|
||||
**尊重、认可和记录每一位贡献者的工作**是夜莺开源社区的第一指导原则,我们提倡**高效的提问**,这既是对开发者时间的尊重,也是对整个社区知识沉淀的贡献:
|
||||
- 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
@@ -140,7 +140,7 @@ Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
</a>
|
||||
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
[Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
|
||||
|
||||
## 加入交流群
|
||||
|
||||
|
||||
BIN
doc/img/readme/active-events-en.png
Normal file
BIN
doc/img/readme/active-events-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 481 KiB |
BIN
doc/img/readme/alerting-rules-en.png
Normal file
BIN
doc/img/readme/alerting-rules-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 508 KiB |
BIN
doc/img/readme/dashboard-en.png
Normal file
BIN
doc/img/readme/dashboard-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 386 KiB |
BIN
doc/img/readme/integration-components-en.png
Normal file
BIN
doc/img/readme/integration-components-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 424 KiB |
BIN
doc/img/readme/multi-region-arch.png
Normal file
BIN
doc/img/readme/multi-region-arch.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
@@ -138,7 +138,7 @@
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "noraml"
|
||||
"stack": "normal"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
@@ -214,7 +214,7 @@
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "noraml"
|
||||
"stack": "normal"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
|
||||
@@ -34,7 +34,7 @@ labels = { instance="docker-compose-mysql" }
|
||||
# insecure_skip_verify = true
|
||||
|
||||
#[[instances.queries]]
|
||||
# mesurement = "lock_wait"
|
||||
# measurement = "lock_wait"
|
||||
# metric_fields = [ "total" ]
|
||||
# timeout = "3s"
|
||||
# request = '''
|
||||
|
||||
@@ -89,8 +89,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -53,7 +53,7 @@ zh:
|
||||
mem_huge_page_size: 每个大页的大小
|
||||
mem_huge_pages_free: 池中尚未分配的 HugePages 数量
|
||||
mem_huge_pages_total: 预留HugePages的总个数
|
||||
mem_inactive: 空闲的内存数(包括free和avalible的内存)
|
||||
mem_inactive: 空闲的内存数(包括free和available的内存)
|
||||
mem_low_free: 未被使用的低位大小
|
||||
mem_low_total: 低位内存总大小,低位可以达到高位内存一样的作用,而且它还能够被内核用来记录一些自己的数据结构
|
||||
mem_mapped: 设备和文件等映射的大小
|
||||
@@ -105,8 +105,8 @@ zh:
|
||||
netstat_udp_mem: UDP套接字内存Page使用量
|
||||
netstat_udplite_inuse: 正在使用的 udp lite 数量
|
||||
netstat_raw_inuse: 正在使用的 raw socket 数量
|
||||
netstat_frag_inuse: ip fragement 数量
|
||||
netstat_frag_memory: ip fragement 已经分配的内存(byte)
|
||||
netstat_frag_inuse: ip fragment 数量
|
||||
netstat_frag_memory: ip fragment 已经分配的内存(byte)
|
||||
|
||||
#[ping]
|
||||
ping_percent_packet_loss: ping数据包丢失百分比(%)
|
||||
@@ -143,7 +143,7 @@ zh:
|
||||
nginx_active: 当前nginx正在处理的活动连接数,等于Reading/Writing/Waiting总和
|
||||
nginx_handled: 自nginx启动起,处理过的客户端连接总数
|
||||
nginx_reading: 正在读取HTTP请求头部的连接总数
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Krrp-Alive请求,该值会大于handled值
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Keep-Alive请求,该值会大于handled值
|
||||
nginx_upstream_check_fall: upstream_check模块检测到后端失败的次数
|
||||
nginx_upstream_check_rise: upstream_check模块对后端的检测次数
|
||||
nginx_upstream_check_status_code: 后端upstream的状态,up为1,down为0
|
||||
@@ -327,7 +327,7 @@ en:
|
||||
mem_huge_page_size: "The size of each big page"
|
||||
mem_huge_pages_free: "The number of Huge Pages in the pool that have not been allocated"
|
||||
mem_huge_pages_total: "Reserve the total number of Huge Pages"
|
||||
mem_inactive: "Free memory (including the memory of free and avalible)"
|
||||
mem_inactive: "Free memory (including the memory of free and available)"
|
||||
mem_low_free: "Unused low size"
|
||||
mem_low_total: "The total size of the low memory memory can achieve the same role of high memory, and it can be used by the kernel to record some of its own data structure"
|
||||
mem_mapped: "The size of the mapping of equipment and files"
|
||||
@@ -369,7 +369,7 @@ en:
|
||||
netstat_tcp_time_wait: "Time _ WAIT status network link number"
|
||||
netstat_udp_socket: "Number of network links in UDP status"
|
||||
|
||||
processes_blocked: "The number of processes in the unreprudible sleep state('U','D','L')"
|
||||
processes_blocked: "The number of processes in the unreproducible sleep state('U','D','L')"
|
||||
processes_dead: "Number of processes in recycling('X')"
|
||||
processes_idle: "Number of idle processes hanging('I')"
|
||||
processes_paging: "Number of paging processes('P')"
|
||||
@@ -397,7 +397,7 @@ en:
|
||||
nginx_active: "The current number of activity connections that Nginx is being processed is equal to Reading/Writing/Waiting"
|
||||
nginx_handled: "Starting from Nginx, the total number of client connections that have been processed"
|
||||
nginx_reading: "Reading the total number of connections on the http request header"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Krrp - Alive requests, this value will be greater than the handled value"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Keep-Alive requests, this value will be greater than the handled value"
|
||||
nginx_upstream_check_fall: "UPStream_CHECK module detects the number of back -end failures"
|
||||
nginx_upstream_check_rise: "UPSTREAM _ Check module to detect the number of back -end"
|
||||
nginx_upstream_check_status_code: "The state of the backstream is 1, and the down is 0"
|
||||
@@ -663,7 +663,7 @@ en:
|
||||
# vmalloc已分配的内存,虚拟地址空间上的连续的内存
|
||||
node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used
|
||||
# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值
|
||||
node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free
|
||||
node_memory_VmallocChunk_bytes: Largest contiguous block of vmalloc area which is free
|
||||
# 内存的硬件故障删除掉的内存页的总大小
|
||||
node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working
|
||||
# 用于在虚拟和物理内存地址之间映射的内存
|
||||
@@ -700,7 +700,7 @@ en:
|
||||
# 匿名页内存大小
|
||||
node_memory_AnonPages_bytes: Memory in user pages not backed by files
|
||||
# 被关联的内存页大小
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mapped, such as libraries
|
||||
# file-backed内存页缓存大小
|
||||
node_memory_Cached_bytes: Parked file data (file content) cache
|
||||
# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[logs]
|
||||
## just a placholder
|
||||
## just a placeholder
|
||||
api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q"
|
||||
## enable log collect or not
|
||||
enable = true
|
||||
|
||||
@@ -86,8 +86,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -53,7 +53,7 @@ zh:
|
||||
mem_huge_page_size: 每个大页的大小
|
||||
mem_huge_pages_free: 池中尚未分配的 HugePages 数量
|
||||
mem_huge_pages_total: 预留HugePages的总个数
|
||||
mem_inactive: 空闲的内存数(包括free和avalible的内存)
|
||||
mem_inactive: 空闲的内存数(包括free和available的内存)
|
||||
mem_low_free: 未被使用的低位大小
|
||||
mem_low_total: 低位内存总大小,低位可以达到高位内存一样的作用,而且它还能够被内核用来记录一些自己的数据结构
|
||||
mem_mapped: 设备和文件等映射的大小
|
||||
@@ -105,8 +105,8 @@ zh:
|
||||
netstat_udp_mem: UDP套接字内存Page使用量
|
||||
netstat_udplite_inuse: 正在使用的 udp lite 数量
|
||||
netstat_raw_inuse: 正在使用的 raw socket 数量
|
||||
netstat_frag_inuse: ip fragement 数量
|
||||
netstat_frag_memory: ip fragement 已经分配的内存(byte)
|
||||
netstat_frag_inuse: ip fragment 数量
|
||||
netstat_frag_memory: ip fragment 已经分配的内存(byte)
|
||||
|
||||
#[ping]
|
||||
ping_percent_packet_loss: ping数据包丢失百分比(%)
|
||||
@@ -143,7 +143,7 @@ zh:
|
||||
nginx_active: 当前nginx正在处理的活动连接数,等于Reading/Writing/Waiting总和
|
||||
nginx_handled: 自nginx启动起,处理过的客户端连接总数
|
||||
nginx_reading: 正在读取HTTP请求头部的连接总数
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Krrp-Alive请求,该值会大于handled值
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Keep-Alive请求,该值会大于handled值
|
||||
nginx_upstream_check_fall: upstream_check模块检测到后端失败的次数
|
||||
nginx_upstream_check_rise: upstream_check模块对后端的检测次数
|
||||
nginx_upstream_check_status_code: 后端upstream的状态,up为1,down为0
|
||||
@@ -327,7 +327,7 @@ en:
|
||||
mem_huge_page_size: "The size of each big page"
|
||||
mem_huge_pages_free: "The number of Huge Pages in the pool that have not been allocated"
|
||||
mem_huge_pages_total: "Reserve the total number of Huge Pages"
|
||||
mem_inactive: "Free memory (including the memory of free and avalible)"
|
||||
mem_inactive: "Free memory (including the memory of free and available)"
|
||||
mem_low_free: "Unused low size"
|
||||
mem_low_total: "The total size of the low memory memory can achieve the same role of high memory, and it can be used by the kernel to record some of its own data structure"
|
||||
mem_mapped: "The size of the mapping of equipment and files"
|
||||
@@ -369,7 +369,7 @@ en:
|
||||
netstat_tcp_time_wait: "Time _ WAIT status network link number"
|
||||
netstat_udp_socket: "Number of network links in UDP status"
|
||||
|
||||
processes_blocked: "The number of processes in the unreprudible sleep state('U','D','L')"
|
||||
processes_blocked: "The number of processes in the unreproducible sleep state('U','D','L')"
|
||||
processes_dead: "Number of processes in recycling('X')"
|
||||
processes_idle: "Number of idle processes hanging('I')"
|
||||
processes_paging: "Number of paging processes('P')"
|
||||
@@ -397,7 +397,7 @@ en:
|
||||
nginx_active: "The current number of activity connections that Nginx is being processed is equal to Reading/Writing/Waiting"
|
||||
nginx_handled: "Starting from Nginx, the total number of client connections that have been processed"
|
||||
nginx_reading: "Reading the total number of connections on the http request header"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Krrp - Alive requests, this value will be greater than the handled value"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Keep-Alive requests, this value will be greater than the handled value"
|
||||
nginx_upstream_check_fall: "UPStream_CHECK module detects the number of back -end failures"
|
||||
nginx_upstream_check_rise: "UPSTREAM _ Check module to detect the number of back -end"
|
||||
nginx_upstream_check_status_code: "The state of the backstream is 1, and the down is 0"
|
||||
@@ -663,7 +663,7 @@ en:
|
||||
# vmalloc已分配的内存,虚拟地址空间上的连续的内存
|
||||
node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used
|
||||
# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值
|
||||
node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free
|
||||
node_memory_VmallocChunk_bytes: Largest contiguous block of vmalloc area which is free
|
||||
# 内存的硬件故障删除掉的内存页的总大小
|
||||
node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working
|
||||
# 用于在虚拟和物理内存地址之间映射的内存
|
||||
@@ -700,7 +700,7 @@ en:
|
||||
# 匿名页内存大小
|
||||
node_memory_AnonPages_bytes: Memory in user pages not backed by files
|
||||
# 被关联的内存页大小
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mapped, such as libraries
|
||||
# file-backed内存页缓存大小
|
||||
node_memory_Cached_bytes: Parked file data (file content) cache
|
||||
# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化
|
||||
|
||||
@@ -86,8 +86,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -53,7 +53,7 @@ zh:
|
||||
mem_huge_page_size: 每个大页的大小
|
||||
mem_huge_pages_free: 池中尚未分配的 HugePages 数量
|
||||
mem_huge_pages_total: 预留HugePages的总个数
|
||||
mem_inactive: 空闲的内存数(包括free和avalible的内存)
|
||||
mem_inactive: 空闲的内存数(包括free和available的内存)
|
||||
mem_low_free: 未被使用的低位大小
|
||||
mem_low_total: 低位内存总大小,低位可以达到高位内存一样的作用,而且它还能够被内核用来记录一些自己的数据结构
|
||||
mem_mapped: 设备和文件等映射的大小
|
||||
@@ -105,8 +105,8 @@ zh:
|
||||
netstat_udp_mem: UDP套接字内存Page使用量
|
||||
netstat_udplite_inuse: 正在使用的 udp lite 数量
|
||||
netstat_raw_inuse: 正在使用的 raw socket 数量
|
||||
netstat_frag_inuse: ip fragement 数量
|
||||
netstat_frag_memory: ip fragement 已经分配的内存(byte)
|
||||
netstat_frag_inuse: ip fragment 数量
|
||||
netstat_frag_memory: ip fragment 已经分配的内存(byte)
|
||||
|
||||
#[ping]
|
||||
ping_percent_packet_loss: ping数据包丢失百分比(%)
|
||||
@@ -143,7 +143,7 @@ zh:
|
||||
nginx_active: 当前nginx正在处理的活动连接数,等于Reading/Writing/Waiting总和
|
||||
nginx_handled: 自nginx启动起,处理过的客户端连接总数
|
||||
nginx_reading: 正在读取HTTP请求头部的连接总数
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Krrp-Alive请求,该值会大于handled值
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Keep-Alive请求,该值会大于handled值
|
||||
nginx_upstream_check_fall: upstream_check模块检测到后端失败的次数
|
||||
nginx_upstream_check_rise: upstream_check模块对后端的检测次数
|
||||
nginx_upstream_check_status_code: 后端upstream的状态,up为1,down为0
|
||||
@@ -327,7 +327,7 @@ en:
|
||||
mem_huge_page_size: "The size of each big page"
|
||||
mem_huge_pages_free: "The number of Huge Pages in the pool that have not been allocated"
|
||||
mem_huge_pages_total: "Reserve the total number of Huge Pages"
|
||||
mem_inactive: "Free memory (including the memory of free and avalible)"
|
||||
mem_inactive: "Free memory (including the memory of free and available)"
|
||||
mem_low_free: "Unused low size"
|
||||
mem_low_total: "The total size of the low memory memory can achieve the same role of high memory, and it can be used by the kernel to record some of its own data structure"
|
||||
mem_mapped: "The size of the mapping of equipment and files"
|
||||
@@ -369,7 +369,7 @@ en:
|
||||
netstat_tcp_time_wait: "Time _ WAIT status network link number"
|
||||
netstat_udp_socket: "Number of network links in UDP status"
|
||||
|
||||
processes_blocked: "The number of processes in the unreprudible sleep state('U','D','L')"
|
||||
processes_blocked: "The number of processes in the unreproducible sleep state('U','D','L')"
|
||||
processes_dead: "Number of processes in recycling('X')"
|
||||
processes_idle: "Number of idle processes hanging('I')"
|
||||
processes_paging: "Number of paging processes('P')"
|
||||
@@ -397,7 +397,7 @@ en:
|
||||
nginx_active: "The current number of activity connections that Nginx is being processed is equal to Reading/Writing/Waiting"
|
||||
nginx_handled: "Starting from Nginx, the total number of client connections that have been processed"
|
||||
nginx_reading: "Reading the total number of connections on the http request header"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Krrp - Alive requests, this value will be greater than the handled value"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Keep-Alive requests, this value will be greater than the handled value"
|
||||
nginx_upstream_check_fall: "UPStream_CHECK module detects the number of back -end failures"
|
||||
nginx_upstream_check_rise: "UPSTREAM _ Check module to detect the number of back -end"
|
||||
nginx_upstream_check_status_code: "The state of the backstream is 1, and the down is 0"
|
||||
@@ -663,7 +663,7 @@ en:
|
||||
# vmalloc已分配的内存,虚拟地址空间上的连续的内存
|
||||
node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used
|
||||
# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值
|
||||
node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free
|
||||
node_memory_VmallocChunk_bytes: Largest contiguous block of vmalloc area which is free
|
||||
# 内存的硬件故障删除掉的内存页的总大小
|
||||
node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working
|
||||
# 用于在虚拟和物理内存地址之间映射的内存
|
||||
@@ -700,7 +700,7 @@ en:
|
||||
# 匿名页内存大小
|
||||
node_memory_AnonPages_bytes: Memory in user pages not backed by files
|
||||
# 被关联的内存页大小
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mapped, such as libraries
|
||||
# file-backed内存页缓存大小
|
||||
node_memory_Cached_bytes: Parked file data (file content) cache
|
||||
# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化
|
||||
|
||||
@@ -209,6 +209,7 @@ CREATE TABLE board (
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
note varchar(1024) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (group_id, name)
|
||||
) ;
|
||||
@@ -219,6 +220,7 @@ COMMENT ON COLUMN board.public IS '0:false 1:true';
|
||||
COMMENT ON COLUMN board.built_in IS '0:false 1:true';
|
||||
COMMENT ON COLUMN board.hide IS '0:false 1:true';
|
||||
COMMENT ON COLUMN board.public_cate IS '0 anonymous 1 login 2 busi';
|
||||
COMMENT ON COLUMN board.note IS 'note';
|
||||
|
||||
|
||||
-- for dashboard new version
|
||||
@@ -873,6 +875,7 @@ CREATE TABLE builtin_payloads (
|
||||
name VARCHAR(191) NOT NULL,
|
||||
tags VARCHAR(191) NOT NULL DEFAULT '',
|
||||
content TEXT NOT NULL,
|
||||
note VARCHAR(1024) NOT NULL DEFAULT '',
|
||||
created_at BIGINT NOT NULL DEFAULT 0,
|
||||
created_by VARCHAR(191) NOT NULL DEFAULT '',
|
||||
updated_at BIGINT NOT NULL DEFAULT 0,
|
||||
@@ -956,7 +959,7 @@ CREATE TABLE notify_rule (
|
||||
id bigserial PRIMARY KEY,
|
||||
name varchar(255) NOT NULL,
|
||||
description text,
|
||||
enable smallint NOT NULL DEFAULT 0,
|
||||
enable boolean DEFAULT false,
|
||||
user_group_ids varchar(255) NOT NULL DEFAULT '',
|
||||
notify_configs text,
|
||||
pipeline_configs text,
|
||||
@@ -971,7 +974,7 @@ CREATE TABLE notify_channel (
|
||||
name varchar(255) NOT NULL,
|
||||
ident varchar(255) NOT NULL,
|
||||
description text,
|
||||
enable smallint NOT NULL DEFAULT 0,
|
||||
enable boolean DEFAULT false,
|
||||
param_config text,
|
||||
request_type varchar(50) NOT NULL,
|
||||
request_config text,
|
||||
|
||||
@@ -90,8 +90,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -50,7 +50,7 @@ mem_high_total: 高位内存总大小(Highmem是指所有内存高于860MB的
|
||||
mem_huge_page_size: 每个大页的大小
|
||||
mem_huge_pages_free: 池中尚未分配的 HugePages 数量
|
||||
mem_huge_pages_total: 预留HugePages的总个数
|
||||
mem_inactive: 空闲的内存数(包括free和avalible的内存)
|
||||
mem_inactive: 空闲的内存数(包括free和available的内存)
|
||||
mem_low_free: 未被使用的低位大小
|
||||
mem_low_total: 低位内存总大小,低位可以达到高位内存一样的作用,而且它还能够被内核用来记录一些自己的数据结构
|
||||
mem_mapped: 设备和文件等映射的大小
|
||||
@@ -115,7 +115,7 @@ nginx_accepts: 自nginx启动起,与客户端建立过得连接总数
|
||||
nginx_active: 当前nginx正在处理的活动连接数,等于Reading/Writing/Waiting总和
|
||||
nginx_handled: 自nginx启动起,处理过的客户端连接总数
|
||||
nginx_reading: 正在读取HTTP请求头部的连接总数
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Krrp-Alive请求,该值会大于handled值
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Keep-Alive请求,该值会大于handled值
|
||||
nginx_upstream_check_fall: upstream_check模块检测到后端失败的次数
|
||||
nginx_upstream_check_rise: upstream_check模块对后端的检测次数
|
||||
nginx_upstream_check_status_code: 后端upstream的状态,up为1,down为0
|
||||
@@ -383,7 +383,7 @@ node_memory_VmallocTotal_bytes: Total size of vmalloc memory area
|
||||
# vmalloc已分配的内存,虚拟地址空间上的连续的内存
|
||||
node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used
|
||||
# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值
|
||||
node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free
|
||||
node_memory_VmallocChunk_bytes: Largest contiguous block of vmalloc area which is free
|
||||
# 内存的硬件故障删除掉的内存页的总大小
|
||||
node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working
|
||||
# 用于在虚拟和物理内存地址之间映射的内存
|
||||
@@ -420,7 +420,7 @@ node_memory_Shmem_bytes: Used shared memory (shared between several processes, t
|
||||
# 匿名页内存大小
|
||||
node_memory_AnonPages_bytes: Memory in user pages not backed by files
|
||||
# 被关联的内存页大小
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mapped, such as libraries
|
||||
# file-backed内存页缓存大小
|
||||
node_memory_Cached_bytes: Parked file data (file content) cache
|
||||
# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化
|
||||
|
||||
@@ -61,7 +61,7 @@ CREATE TABLE `configs` (
|
||||
`external` bigint DEFAULT 0 COMMENT '0\\:built-in 1\\:external',
|
||||
`encrypted` bigint DEFAULT 0 COMMENT '0\\:plaintext 1\\:ciphertext',
|
||||
`create_at` bigint DEFAULT 0 COMMENT 'create_at',
|
||||
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'cerate_by',
|
||||
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'create_by',
|
||||
`update_at` bigint DEFAULT 0 COMMENT 'update_at',
|
||||
`update_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'update_by',
|
||||
PRIMARY KEY (`id`)
|
||||
@@ -192,6 +192,7 @@ CREATE TABLE `board` (
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
`note` varchar(1024) not null default '' comment 'note',
|
||||
`public_cate` bigint NOT NULL NOT NULL DEFAULT 0 COMMENT '0 anonymous 1 login 2 busi',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`group_id`, `name`),
|
||||
@@ -546,6 +547,7 @@ CREATE TABLE `builtin_payloads` (
|
||||
`name` varchar(191) NOT NULL COMMENT '''name of payload''',
|
||||
`tags` varchar(191) NOT NULL DEFAULT '' COMMENT '''tags of payload''',
|
||||
`content` longtext NOT NULL COMMENT '''content of payload''',
|
||||
`note` varchar(1024) NOT NULL DEFAULT '' COMMENT '''note of payload''',
|
||||
`created_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''create time''',
|
||||
`created_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''creator''',
|
||||
`updated_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''update time''',
|
||||
@@ -674,7 +676,7 @@ CREATE TABLE `notify_tpl` (
|
||||
`name` varchar(255) not null,
|
||||
`content` text not null,
|
||||
`create_at` bigint DEFAULT 0 COMMENT 'create_at',
|
||||
`create_by` varchar(64) DEFAULT '' COMMENT 'cerate_by',
|
||||
`create_by` varchar(64) DEFAULT '' COMMENT 'create_by',
|
||||
`update_at` bigint DEFAULT 0 COMMENT 'update_at',
|
||||
`update_by` varchar(64) DEFAULT '' COMMENT 'update_by',
|
||||
PRIMARY KEY (`id`),
|
||||
@@ -723,7 +725,6 @@ CREATE TABLE `builtin_metrics` (
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
|
||||
`uuid` bigint NOT NULL DEFAULT 0 COMMENT '''uuid''',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `idx_collector_typ_name` (`lang`,`collector`, `typ`, `name`),
|
||||
INDEX `idx_uuid` (`uuid`),
|
||||
INDEX `idx_collector` (`collector`),
|
||||
INDEX `idx_typ` (`typ`),
|
||||
@@ -837,8 +838,8 @@ CREATE TABLE `event_pipeline` (
|
||||
`description` varchar(255) not null default '',
|
||||
`filter_enable` tinyint(1) not null default 0,
|
||||
`label_filters` text,
|
||||
`attribute_filters` text,
|
||||
`processors` text,
|
||||
`attr_filters` text,
|
||||
`processor_configs` text,
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
|
||||
@@ -13,7 +13,6 @@ CREATE TABLE `builtin_metrics` (
|
||||
`updated_at` bigint NOT NULL DEFAULT 0 COMMENT 'update time',
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT 'updater',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `idx_collector_typ_name` (`lang`,`collector`, `typ`, `name`),
|
||||
INDEX `idx_collector` (`collector`),
|
||||
INDEX `idx_typ` (`typ`),
|
||||
INDEX `idx_name` (`name`),
|
||||
@@ -236,9 +235,8 @@ CREATE TABLE `event_pipeline` (
|
||||
`team_ids` text,
|
||||
`description` varchar(255) not null default '',
|
||||
`filter_enable` tinyint(1) not null default 0,
|
||||
`label_filters` text,
|
||||
`attribute_filters` text,
|
||||
`processors` text,
|
||||
`attr_filters` text,
|
||||
`processor_configs` text,
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
@@ -246,7 +244,21 @@ CREATE TABLE `event_pipeline` (
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
/* v8.0.0-next */
|
||||
/* v8.0.0 2025-05-15 */
|
||||
CREATE TABLE `embedded_product` (
|
||||
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(255) DEFAULT NULL,
|
||||
`url` varchar(255) DEFAULT NULL,
|
||||
`is_private` boolean DEFAULT NULL,
|
||||
`team_ids` varchar(255),
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
|
||||
/* v8.0.0 2025-05-29 */
|
||||
CREATE TABLE `source_token` (
|
||||
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
|
||||
`source_type` varchar(64) NOT NULL DEFAULT '' COMMENT 'source type',
|
||||
@@ -259,6 +271,25 @@ CREATE TABLE `source_token` (
|
||||
KEY `idx_source_type_id_token` (`source_type`, `source_id`, `token`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
|
||||
|
||||
|
||||
/* v8.0.0-beta.12 2025-06-03 */
|
||||
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
|
||||
ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
|
||||
|
||||
/* v8.0.0-beta.13 */
|
||||
-- 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
|
||||
DROP INDEX IF EXISTS `idx_collector_typ_name` ON `builtin_metrics`;
|
||||
|
||||
/* v8.0.0 2025-07-03 */
|
||||
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
|
||||
|
||||
/* v8.4.0 2025-10-15 */
|
||||
ALTER TABLE `notify_rule` ADD COLUMN `extra_config` text COMMENT 'extra config';
|
||||
|
||||
/* v8.4.1 2025-11-10 */
|
||||
ALTER TABLE `alert_rule` ADD COLUMN `pipeline_configs` text COMMENT 'pipeline configs';
|
||||
|
||||
/* v8.4.2 2025-11-13 */
|
||||
ALTER TABLE `board` ADD COLUMN `note` varchar(1024) not null default '' comment 'note';
|
||||
ALTER TABLE `builtin_payloads` ADD COLUMN `note` varchar(1024) not null default '' comment 'note of payload';
|
||||
@@ -184,6 +184,7 @@ CREATE TABLE `board` (
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
`note` varchar(1024) not null default '',
|
||||
`public_cate` bigint not null default 0
|
||||
);
|
||||
CREATE UNIQUE INDEX idx_board_group_id_name ON `board` (group_id, name);
|
||||
@@ -491,6 +492,7 @@ CREATE TABLE `builtin_payloads` (
|
||||
`name` varchar(191) not null,
|
||||
`tags` varchar(191) not null default '',
|
||||
`content` longtext not null,
|
||||
`note` varchar(1024) not null default '',
|
||||
`created_at` bigint(20) not null default 0,
|
||||
`created_by` varchar(191) not null default '',
|
||||
`updated_at` bigint(20) not null default 0,
|
||||
@@ -656,7 +658,6 @@ CREATE TABLE `builtin_metrics` (
|
||||
`uuid integer` not null default 0
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX idx_collector_typ_name ON builtin_metrics (lang, collector, typ, name);
|
||||
CREATE INDEX idx_collector ON builtin_metrics (collector);
|
||||
CREATE INDEX idx_typ ON builtin_metrics (typ);
|
||||
CREATE INDEX idx_builtinmetric_name ON builtin_metrics (name);
|
||||
|
||||
@@ -8,8 +8,10 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource"
|
||||
_ "github.com/ccfos/nightingale/v6/datasource/ck"
|
||||
_ "github.com/ccfos/nightingale/v6/datasource/doris"
|
||||
"github.com/ccfos/nightingale/v6/datasource/es"
|
||||
_ "github.com/ccfos/nightingale/v6/datasource/mysql"
|
||||
_ "github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
_ "github.com/ccfos/nightingale/v6/datasource/postgresql"
|
||||
"github.com/ccfos/nightingale/v6/dskit/tdengine"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -20,6 +22,8 @@ import (
|
||||
|
||||
var FromAPIHook func()
|
||||
|
||||
var DatasourceProcessHook func(items []datasource.DatasourceInfo) []datasource.DatasourceInfo
|
||||
|
||||
func Init(ctx *ctx.Context, fromAPI bool) {
|
||||
go getDatasourcesFromDBLoop(ctx, fromAPI)
|
||||
}
|
||||
@@ -28,7 +32,7 @@ type ListInput struct {
|
||||
Page int `json:"p"`
|
||||
Limit int `json:"limit"`
|
||||
Category string `json:"category"`
|
||||
PluginType string `json:"plugin_type"` // promethues
|
||||
PluginType string `json:"plugin_type"` // prometheus
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
@@ -82,8 +86,6 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
|
||||
|
||||
if item.PluginType == "elasticsearch" {
|
||||
esN9eToDatasourceInfo(&ds, item)
|
||||
} else if item.PluginType == "opensearch" {
|
||||
osN9eToDatasourceInfo(&ds, item)
|
||||
} else if item.PluginType == "tdengine" {
|
||||
tdN9eToDatasourceInfo(&ds, item)
|
||||
} else {
|
||||
@@ -100,6 +102,10 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
|
||||
atomic.StoreInt64(&PromDefaultDatasourceId, 0)
|
||||
}
|
||||
|
||||
if DatasourceProcessHook != nil {
|
||||
dss = DatasourceProcessHook(dss)
|
||||
}
|
||||
|
||||
PutDatasources(dss)
|
||||
} else {
|
||||
FromAPIHook()
|
||||
@@ -144,24 +150,6 @@ func esN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource
|
||||
ds.Settings["es.enable_write"] = item.SettingsJson["enable_write"]
|
||||
}
|
||||
|
||||
// for opensearch
|
||||
func osN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource) {
|
||||
ds.Settings = make(map[string]interface{})
|
||||
ds.Settings["os.nodes"] = []string{item.HTTPJson.Url}
|
||||
ds.Settings["os.timeout"] = item.HTTPJson.Timeout
|
||||
ds.Settings["os.basic"] = es.BasicAuth{
|
||||
Username: item.AuthJson.BasicAuthUser,
|
||||
Password: item.AuthJson.BasicAuthPassword,
|
||||
}
|
||||
ds.Settings["os.tls"] = es.TLS{
|
||||
SkipTlsVerify: item.HTTPJson.TLS.SkipTlsVerify,
|
||||
}
|
||||
ds.Settings["os.version"] = item.SettingsJson["version"]
|
||||
ds.Settings["os.headers"] = item.HTTPJson.Headers
|
||||
ds.Settings["os.min_interval"] = item.SettingsJson["min_interval"]
|
||||
ds.Settings["os.max_shard"] = item.SettingsJson["max_shard"]
|
||||
}
|
||||
|
||||
func PutDatasources(items []datasource.DatasourceInfo) {
|
||||
ids := make([]int64, 0)
|
||||
for _, item := range items {
|
||||
@@ -181,7 +169,7 @@ func PutDatasources(items []datasource.DatasourceInfo) {
|
||||
|
||||
ds, err := datasource.GetDatasourceByType(typ, item.Settings)
|
||||
if err != nil {
|
||||
logger.Warningf("get plugin:%+v fail: %v", item, err)
|
||||
logger.Debugf("get plugin:%+v fail: %v", item, err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@ package clickhouse
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
|
||||
"github.com/ClickHouse/clickhouse-go/v2"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
ckDriver "gorm.io/driver/clickhouse"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
@@ -26,11 +26,19 @@ const (
|
||||
)
|
||||
|
||||
type Clickhouse struct {
|
||||
Nodes []string `json:"ck.nodes" mapstructure:"ck.nodes"`
|
||||
User string `json:"ck.user" mapstructure:"ck.user"`
|
||||
Password string `json:"ck.password" mapstructure:"ck.password"`
|
||||
Timeout int `json:"ck.timeout" mapstructure:"ck.timeout"`
|
||||
MaxQueryRows int `json:"ck.max_query_rows" mapstructure:"ck.max_query_rows"`
|
||||
Nodes []string `json:"ck.nodes" mapstructure:"ck.nodes"`
|
||||
User string `json:"ck.user" mapstructure:"ck.user"`
|
||||
Password string `json:"ck.password" mapstructure:"ck.password"`
|
||||
Timeout int `json:"ck.timeout" mapstructure:"ck.timeout"`
|
||||
MaxQueryRows int `json:"ck.max_query_rows" mapstructure:"ck.max_query_rows"`
|
||||
Protocol string `json:"ck.protocol" mapstructure:"ck.protocol"`
|
||||
SkipSSLVerify bool `json:"ck.skip_ssl_verify" mapstructure:"ck.skip_ssl_verify"`
|
||||
SecureConnection bool `json:"ck.secure_connection" mapstructure:"ck.secure_connection"`
|
||||
|
||||
// 连接池配置(可选)
|
||||
MaxIdleConns int `json:"ck.max_idle_conns" mapstructure:"ck.max_idle_conns"` // 最大空闲连接数
|
||||
MaxOpenConns int `json:"ck.max_open_conns" mapstructure:"ck.max_open_conns"` // 最大打开连接数
|
||||
ConnMaxLifetime int `json:"ck.conn_max_lifetime" mapstructure:"ck.conn_max_lifetime"` // 连接最大生命周期(秒)
|
||||
|
||||
Client *gorm.DB `json:"-"`
|
||||
ClientByHTTP *sql.DB `json:"-"`
|
||||
@@ -44,46 +52,129 @@ func (c *Clickhouse) InitCli() error {
|
||||
if len(c.Nodes) == 0 {
|
||||
return fmt.Errorf("not found ck shard, please check datasource config")
|
||||
}
|
||||
// 前端只允许 host:port,直接使用第一个节点
|
||||
addr := c.Nodes[0]
|
||||
url := addr
|
||||
if !strings.HasPrefix(url, "http://") {
|
||||
url = "http://" + url
|
||||
}
|
||||
resp, err := httplib.Get(url).SetTimeout(time.Second * 1).Response()
|
||||
// 忽略HTTP Code错误, 因为可能不是HTTP协议
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
// HTTP 协议
|
||||
if resp.StatusCode == 200 {
|
||||
jsonBytes, _ := io.ReadAll(resp.Body)
|
||||
if len(jsonBytes) > 0 && strings.Contains(strings.ToLower(string(jsonBytes)), "ok.") {
|
||||
ckconn := clickhouse.OpenDB(&clickhouse.Options{
|
||||
Addr: []string{addr},
|
||||
Auth: clickhouse.Auth{
|
||||
Username: c.User,
|
||||
Password: c.Password,
|
||||
},
|
||||
Settings: clickhouse.Settings{
|
||||
"max_execution_time": 60,
|
||||
},
|
||||
|
||||
prot := strings.ToLower(strings.TrimSpace(c.Protocol))
|
||||
// 如果用户显式指定 protocol,只允许 http 或 native
|
||||
if prot != "" {
|
||||
if prot != "http" && prot != "native" {
|
||||
return fmt.Errorf("unsupported clickhouse protocol: %s, only `http`, `https` or `native` allowed", c.Protocol)
|
||||
}
|
||||
|
||||
// HTTP(S) 路径(使用 clickhouse-go HTTP client)
|
||||
if prot == "http" {
|
||||
opts := &clickhouse.Options{
|
||||
Addr: []string{addr},
|
||||
Auth: clickhouse.Auth{Username: c.User, Password: c.Password},
|
||||
Settings: clickhouse.Settings{"max_execution_time": 60},
|
||||
DialTimeout: 10 * time.Second,
|
||||
Protocol: clickhouse.HTTP,
|
||||
})
|
||||
}
|
||||
// 仅当显式指定 https 时才启用 TLS 并使用 SkipSSL 控制 InsecureSkipVerify
|
||||
if c.SecureConnection {
|
||||
opts.TLS = &tls.Config{InsecureSkipVerify: c.SkipSSLVerify}
|
||||
}
|
||||
ckconn := clickhouse.OpenDB(opts)
|
||||
if ckconn == nil {
|
||||
return errors.New("db conn failed")
|
||||
}
|
||||
// 应用连接池配置到 HTTP sql.DB
|
||||
if c.MaxIdleConns > 0 {
|
||||
ckconn.SetMaxIdleConns(c.MaxIdleConns)
|
||||
}
|
||||
if c.MaxOpenConns > 0 {
|
||||
ckconn.SetMaxOpenConns(c.MaxOpenConns)
|
||||
}
|
||||
if c.ConnMaxLifetime > 0 {
|
||||
ckconn.SetConnMaxLifetime(time.Duration(c.ConnMaxLifetime) * time.Second)
|
||||
}
|
||||
c.ClientByHTTP = ckconn
|
||||
return nil
|
||||
}
|
||||
|
||||
// native 路径(使用 gorm + native driver)
|
||||
dsn := fmt.Sprintf(ckDataSource, c.User, c.Password, addr)
|
||||
// 如果启用了 SecureConnection,为 DSN 添加 TLS 参数;SkipSSLVerify 控制是否跳过证书校验
|
||||
if c.SecureConnection {
|
||||
dsn = dsn + "&secure=true"
|
||||
if c.SkipSSLVerify {
|
||||
dsn = dsn + "&skip_verify=true"
|
||||
}
|
||||
}
|
||||
db, err := gorm.Open(
|
||||
ckDriver.New(
|
||||
ckDriver.Config{
|
||||
DSN: dsn,
|
||||
DisableDatetimePrecision: true,
|
||||
DontSupportRenameColumn: true,
|
||||
SkipInitializeWithVersion: false,
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 应用连接池配置到 gorm 底层 *sql.DB
|
||||
if sqlDB, derr := db.DB(); derr == nil {
|
||||
if c.MaxIdleConns > 0 {
|
||||
sqlDB.SetMaxIdleConns(c.MaxIdleConns)
|
||||
}
|
||||
if c.MaxOpenConns > 0 {
|
||||
sqlDB.SetMaxOpenConns(c.MaxOpenConns)
|
||||
}
|
||||
if c.ConnMaxLifetime > 0 {
|
||||
sqlDB.SetConnMaxLifetime(time.Duration(c.ConnMaxLifetime) * time.Second)
|
||||
}
|
||||
} else {
|
||||
logger.Debugf("clickhouse: get native sql DB failed: %v", derr)
|
||||
}
|
||||
c.Client = db
|
||||
return nil
|
||||
}
|
||||
|
||||
opts := &clickhouse.Options{
|
||||
Addr: []string{addr},
|
||||
Auth: clickhouse.Auth{Username: c.User, Password: c.Password},
|
||||
Settings: clickhouse.Settings{"max_execution_time": 60},
|
||||
DialTimeout: 10 * time.Second,
|
||||
Protocol: clickhouse.HTTP,
|
||||
}
|
||||
|
||||
ckconn := clickhouse.OpenDB(opts)
|
||||
if ckconn != nil {
|
||||
// 做一次 Ping 校验,避免把 native 端口误当作 HTTP 使用
|
||||
if err := ckconn.Ping(); err == nil {
|
||||
if c.MaxIdleConns > 0 {
|
||||
ckconn.SetMaxIdleConns(c.MaxIdleConns)
|
||||
}
|
||||
if c.MaxOpenConns > 0 {
|
||||
ckconn.SetMaxOpenConns(c.MaxOpenConns)
|
||||
}
|
||||
if c.ConnMaxLifetime > 0 {
|
||||
ckconn.SetConnMaxLifetime(time.Duration(c.ConnMaxLifetime) * time.Second)
|
||||
}
|
||||
c.ClientByHTTP = ckconn
|
||||
return nil
|
||||
} else {
|
||||
logger.Debugf("clickhouse http ping failed for %s, fallback to native: %v", addr, err)
|
||||
_ = ckconn.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// 作为最后回退,尝试 native 连接
|
||||
host := strings.TrimPrefix(strings.TrimPrefix(addr, "http://"), "https://")
|
||||
dsn := fmt.Sprintf(ckDataSource, c.User, c.Password, host)
|
||||
// 如果启用了 SecureConnection,为 DSN 添加 TLS 参数;SkipSSLVerify 控制是否跳过证书校验
|
||||
if c.SecureConnection {
|
||||
dsn = dsn + "&secure=true"
|
||||
if c.SkipSSLVerify {
|
||||
dsn = dsn + "&skip_verify=true"
|
||||
}
|
||||
}
|
||||
db, err := gorm.Open(
|
||||
ckDriver.New(
|
||||
ckDriver.Config{
|
||||
DSN: fmt.Sprintf(ckDataSource,
|
||||
c.User, c.Password, addr),
|
||||
DSN: dsn,
|
||||
DisableDatetimePrecision: true,
|
||||
DontSupportRenameColumn: true,
|
||||
SkipInitializeWithVersion: false,
|
||||
@@ -92,9 +183,18 @@ func (c *Clickhouse) InitCli() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if sqlDB, derr := db.DB(); derr == nil {
|
||||
if c.MaxIdleConns > 0 {
|
||||
sqlDB.SetMaxIdleConns(c.MaxIdleConns)
|
||||
}
|
||||
if c.MaxOpenConns > 0 {
|
||||
sqlDB.SetMaxOpenConns(c.MaxOpenConns)
|
||||
}
|
||||
if c.ConnMaxLifetime > 0 {
|
||||
sqlDB.SetConnMaxLifetime(time.Duration(c.ConnMaxLifetime) * time.Second)
|
||||
}
|
||||
}
|
||||
c.Client = db
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -129,9 +229,7 @@ func (c *Clickhouse) QueryRows(ctx context.Context, query string) (*sql.Rows, er
|
||||
|
||||
// ShowDatabases lists all databases in Clickhouse
|
||||
func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
|
||||
var (
|
||||
res []string
|
||||
)
|
||||
res := make([]string, 0)
|
||||
|
||||
rows, err := c.QueryRows(ctx, ShowDatabases)
|
||||
if err != nil {
|
||||
@@ -151,9 +249,7 @@ func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
|
||||
|
||||
// ShowTables lists all tables in a given database
|
||||
func (c *Clickhouse) ShowTables(ctx context.Context, database string) ([]string, error) {
|
||||
var (
|
||||
res []string
|
||||
)
|
||||
res := make([]string, 0)
|
||||
|
||||
showTables := fmt.Sprintf(ShowTables, database)
|
||||
rows, err := c.QueryRows(ctx, showTables)
|
||||
|
||||
543
dskit/doris/doris.go
Normal file
543
dskit/doris/doris.go
Normal file
@@ -0,0 +1,543 @@
|
||||
package doris
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/dskit/pool"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql" // MySQL driver
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
// Doris struct to hold connection details and the connection object
|
||||
type Doris struct {
|
||||
Addr string `json:"doris.addr" mapstructure:"doris.addr"` // fe mysql endpoint
|
||||
FeAddr string `json:"doris.fe_addr" mapstructure:"doris.fe_addr"` // fe http endpoint
|
||||
User string `json:"doris.user" mapstructure:"doris.user"` //
|
||||
Password string `json:"doris.password" mapstructure:"doris.password"` //
|
||||
Timeout int `json:"doris.timeout" mapstructure:"doris.timeout"`
|
||||
MaxIdleConns int `json:"doris.max_idle_conns" mapstructure:"doris.max_idle_conns"`
|
||||
MaxOpenConns int `json:"doris.max_open_conns" mapstructure:"doris.max_open_conns"`
|
||||
ConnMaxLifetime int `json:"doris.conn_max_lifetime" mapstructure:"doris.conn_max_lifetime"`
|
||||
MaxQueryRows int `json:"doris.max_query_rows" mapstructure:"doris.max_query_rows"`
|
||||
ClusterName string `json:"doris.cluster_name" mapstructure:"doris.cluster_name"`
|
||||
EnableWrite bool `json:"doris.enable_write" mapstructure:"doris.enable_write"`
|
||||
}
|
||||
|
||||
// NewDorisWithSettings initializes a new Doris instance with the given settings
|
||||
func NewDorisWithSettings(ctx context.Context, settings interface{}) (*Doris, error) {
|
||||
newest := new(Doris)
|
||||
settingsMap := map[string]interface{}{}
|
||||
if reflect.TypeOf(settings).Kind() == reflect.String {
|
||||
if err := json.Unmarshal([]byte(settings.(string)), &settingsMap); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
var assert bool
|
||||
settingsMap, assert = settings.(map[string]interface{})
|
||||
if !assert {
|
||||
return nil, errors.New("settings type invalid")
|
||||
}
|
||||
}
|
||||
if err := mapstructure.Decode(settingsMap, newest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newest, nil
|
||||
}
|
||||
|
||||
// NewConn establishes a new connection to Doris
|
||||
func (d *Doris) NewConn(ctx context.Context, database string) (*sql.DB, error) {
|
||||
if len(d.Addr) == 0 {
|
||||
return nil, errors.New("empty fe-node addr")
|
||||
}
|
||||
|
||||
// Set default values similar to postgres implementation
|
||||
if d.Timeout == 0 {
|
||||
d.Timeout = 60
|
||||
}
|
||||
if d.MaxIdleConns == 0 {
|
||||
d.MaxIdleConns = 10
|
||||
}
|
||||
if d.MaxOpenConns == 0 {
|
||||
d.MaxOpenConns = 100
|
||||
}
|
||||
if d.ConnMaxLifetime == 0 {
|
||||
d.ConnMaxLifetime = 14400
|
||||
}
|
||||
if d.MaxQueryRows == 0 {
|
||||
d.MaxQueryRows = 500
|
||||
}
|
||||
|
||||
var keys []string
|
||||
keys = append(keys, d.Addr)
|
||||
keys = append(keys, d.Password, d.User)
|
||||
if len(database) > 0 {
|
||||
keys = append(keys, database)
|
||||
}
|
||||
cachedkey := strings.Join(keys, ":")
|
||||
// cache conn with database
|
||||
conn, ok := pool.PoolClient.Load(cachedkey)
|
||||
if ok {
|
||||
return conn.(*sql.DB), nil
|
||||
}
|
||||
var db *sql.DB
|
||||
var err error
|
||||
defer func() {
|
||||
if db != nil && err == nil {
|
||||
pool.PoolClient.Store(cachedkey, db)
|
||||
}
|
||||
}()
|
||||
|
||||
// Simplified connection logic for Doris using MySQL driver
|
||||
dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8", d.User, d.Password, d.Addr, database)
|
||||
db, err = sql.Open("mysql", dsn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set connection pool configuration
|
||||
db.SetMaxIdleConns(d.MaxIdleConns)
|
||||
db.SetMaxOpenConns(d.MaxOpenConns)
|
||||
db.SetConnMaxLifetime(time.Duration(d.ConnMaxLifetime) * time.Second)
|
||||
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// createTimeoutContext creates a context with timeout based on Doris configuration
|
||||
func (d *Doris) createTimeoutContext(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||
timeout := d.Timeout
|
||||
if timeout == 0 {
|
||||
timeout = 60
|
||||
}
|
||||
return context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
|
||||
}
|
||||
|
||||
// ShowDatabases lists all databases in Doris
|
||||
func (d *Doris) ShowDatabases(ctx context.Context) ([]string, error) {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, "")
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
|
||||
rows, err := db.QueryContext(timeoutCtx, "SHOW DATABASES")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
databases := make([]string, 0)
|
||||
for rows.Next() {
|
||||
var dbName string
|
||||
if err := rows.Scan(&dbName); err != nil {
|
||||
continue
|
||||
}
|
||||
databases = append(databases, dbName)
|
||||
}
|
||||
return databases, nil
|
||||
}
|
||||
|
||||
// ShowResources lists all resources with type resourceType in Doris
|
||||
func (d *Doris) ShowResources(ctx context.Context, resourceType string) ([]string, error) {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, "")
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
|
||||
// 使用 SHOW RESOURCES 命令
|
||||
query := fmt.Sprintf("SHOW RESOURCES WHERE RESOURCETYPE = '%s'", resourceType)
|
||||
rows, err := db.QueryContext(timeoutCtx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
distinctName := make(map[string]struct{})
|
||||
|
||||
// 获取列信息
|
||||
columns, err := rows.Columns()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get columns: %w", err)
|
||||
}
|
||||
|
||||
// 准备接收数据的变量
|
||||
values := make([]interface{}, len(columns))
|
||||
valuePtrs := make([]interface{}, len(columns))
|
||||
for i := range values {
|
||||
valuePtrs[i] = &values[i]
|
||||
}
|
||||
|
||||
// 遍历结果集
|
||||
for rows.Next() {
|
||||
err := rows.Scan(valuePtrs...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error scanning row: %w", err)
|
||||
}
|
||||
// 提取资源名称并添加到 map 中(自动去重)
|
||||
if name, ok := values[0].([]byte); ok {
|
||||
distinctName[string(name)] = struct{}{}
|
||||
} else if nameStr, ok := values[0].(string); ok {
|
||||
distinctName[nameStr] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error iterating rows: %w", err)
|
||||
}
|
||||
|
||||
// 将 map 转换为切片
|
||||
resources := make([]string, 0)
|
||||
for name := range distinctName {
|
||||
resources = append(resources, name)
|
||||
}
|
||||
|
||||
return resources, nil
|
||||
}
|
||||
|
||||
// ShowTables lists all tables in a given database
|
||||
func (d *Doris) ShowTables(ctx context.Context, database string) ([]string, error) {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, database)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("SHOW TABLES IN %s", database)
|
||||
rows, err := db.QueryContext(timeoutCtx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
tables := make([]string, 0)
|
||||
for rows.Next() {
|
||||
var tableName string
|
||||
if err := rows.Scan(&tableName); err != nil {
|
||||
continue
|
||||
}
|
||||
tables = append(tables, tableName)
|
||||
}
|
||||
return tables, nil
|
||||
}
|
||||
|
||||
// DescTable describes the schema of a specified table in Doris
|
||||
func (d *Doris) DescTable(ctx context.Context, database, table string) ([]*types.ColumnProperty, error) {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, database)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("DESCRIBE %s.%s", database, table)
|
||||
rows, err := db.QueryContext(timeoutCtx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// 日志报表中需要把 .type 转化成内部类型
|
||||
// TODO: 是否有复合类型, Array/JSON/Tuple/Nested, 是否有更多的类型
|
||||
convertDorisType := func(origin string) (string, bool) {
|
||||
lower := strings.ToLower(origin)
|
||||
switch lower {
|
||||
case "double":
|
||||
return types.LogExtractValueTypeFloat, true
|
||||
|
||||
case "datetime", "date":
|
||||
return types.LogExtractValueTypeDate, false
|
||||
|
||||
case "text":
|
||||
return types.LogExtractValueTypeText, true
|
||||
|
||||
default:
|
||||
if strings.Contains(lower, "int") {
|
||||
return types.LogExtractValueTypeLong, true
|
||||
}
|
||||
// 日期类型统一按照.date处理
|
||||
if strings.HasPrefix(lower, "date") {
|
||||
return types.LogExtractValueTypeDate, false
|
||||
}
|
||||
if strings.HasPrefix(lower, "varchar") || strings.HasPrefix(lower, "char") {
|
||||
return types.LogExtractValueTypeText, true
|
||||
}
|
||||
if strings.HasPrefix(lower, "decimal") {
|
||||
return types.LogExtractValueTypeFloat, true
|
||||
}
|
||||
}
|
||||
|
||||
return origin, false
|
||||
}
|
||||
|
||||
var columns []*types.ColumnProperty
|
||||
for rows.Next() {
|
||||
var (
|
||||
field string
|
||||
typ string
|
||||
null string
|
||||
key string
|
||||
defaultValue sql.NullString
|
||||
extra string
|
||||
)
|
||||
if err := rows.Scan(&field, &typ, &null, &key, &defaultValue, &extra); err != nil {
|
||||
continue
|
||||
}
|
||||
type2, indexable := convertDorisType(typ)
|
||||
columns = append(columns, &types.ColumnProperty{
|
||||
Field: field,
|
||||
Type: typ, // You might want to convert MySQL types to your custom types
|
||||
|
||||
Type2: type2,
|
||||
Indexable: indexable,
|
||||
})
|
||||
}
|
||||
return columns, nil
|
||||
}
|
||||
|
||||
// SelectRows selects rows from a specified table in Doris based on a given query with MaxQueryRows check
|
||||
func (d *Doris) SelectRows(ctx context.Context, database, table, query string) ([]map[string]interface{}, error) {
|
||||
sql := fmt.Sprintf("SELECT * FROM %s.%s", database, table)
|
||||
if query != "" {
|
||||
sql += " " + query
|
||||
}
|
||||
|
||||
// 检查查询结果行数
|
||||
err := d.CheckMaxQueryRows(ctx, database, sql)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return d.ExecQuery(ctx, database, sql)
|
||||
}
|
||||
|
||||
// ExecQuery executes a given SQL query in Doris and returns the results
|
||||
func (d *Doris) ExecQuery(ctx context.Context, database string, sql string) ([]map[string]interface{}, error) {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, database)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rows, err := db.QueryContext(timeoutCtx, sql)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
columns, err := rows.Columns()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []map[string]interface{}
|
||||
|
||||
for rows.Next() {
|
||||
columnValues := make([]interface{}, len(columns))
|
||||
columnPointers := make([]interface{}, len(columns))
|
||||
for i := range columnValues {
|
||||
columnPointers[i] = &columnValues[i]
|
||||
}
|
||||
|
||||
if err := rows.Scan(columnPointers...); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
rowMap := make(map[string]interface{})
|
||||
for i, colName := range columns {
|
||||
val := columnValues[i]
|
||||
bytes, ok := val.([]byte)
|
||||
if ok {
|
||||
rowMap[colName] = string(bytes)
|
||||
} else {
|
||||
rowMap[colName] = val
|
||||
}
|
||||
}
|
||||
results = append(results, rowMap)
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// ExecContext executes a given SQL query in Doris and returns the results
|
||||
func (d *Doris) ExecContext(ctx context.Context, database string, sql string) error {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
db, err := d.NewConn(timeoutCtx, database)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = db.ExecContext(timeoutCtx, sql)
|
||||
return err
|
||||
}
|
||||
|
||||
// ExecBatchSQL 执行多条 SQL 语句
|
||||
func (d *Doris) ExecBatchSQL(ctx context.Context, database string, sqlBatch string) error {
|
||||
// 分割 SQL 语句
|
||||
sqlStatements := SplitSQLStatements(sqlBatch)
|
||||
|
||||
// 逐条执行 SQL 语句
|
||||
for _, ql := range sqlStatements {
|
||||
// 跳过空语句
|
||||
ql = strings.TrimSpace(ql)
|
||||
if ql == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// 检查是否是 CREATE DATABASE 语句
|
||||
isCreateDB := strings.HasPrefix(strings.ToUpper(ql), "CREATE DATABASE")
|
||||
// strings.HasPrefix(strings.ToUpper(sql), "CREATE SCHEMA") // 暂时不支持CREATE SCHEMA
|
||||
|
||||
// 对于 CREATE DATABASE 语句,使用空数据库名连接
|
||||
currentDB := database
|
||||
if isCreateDB {
|
||||
currentDB = ""
|
||||
}
|
||||
|
||||
// 执行单条 SQL,ExecContext 内部已经包含超时处理
|
||||
err := d.ExecContext(ctx, currentDB, ql)
|
||||
if err != nil {
|
||||
return fmt.Errorf("exec sql failed, sql:%s, err:%w", sqlBatch, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SplitSQLStatements 将多条 SQL 语句分割成单独的语句
|
||||
func SplitSQLStatements(sqlBatch string) []string {
|
||||
var statements []string
|
||||
var currentStatement strings.Builder
|
||||
|
||||
// 状态标记
|
||||
var (
|
||||
inString bool // 是否在字符串内
|
||||
inComment bool // 是否在单行注释内
|
||||
inMultilineComment bool // 是否在多行注释内
|
||||
escaped bool // 前一个字符是否为转义字符
|
||||
)
|
||||
|
||||
for i := 0; i < len(sqlBatch); i++ {
|
||||
char := sqlBatch[i]
|
||||
currentStatement.WriteByte(char)
|
||||
|
||||
// 处理转义字符
|
||||
if inString && char == '\\' {
|
||||
escaped = !escaped
|
||||
continue
|
||||
}
|
||||
|
||||
// 处理字符串
|
||||
if char == '\'' && !inComment && !inMultilineComment {
|
||||
if !escaped {
|
||||
inString = !inString
|
||||
}
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
|
||||
// 处理单行注释
|
||||
if !inString && !inMultilineComment && !inComment && char == '-' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '-' {
|
||||
inComment = true
|
||||
currentStatement.WriteByte(sqlBatch[i+1]) // 写入第二个'-'
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// 处理多行注释开始
|
||||
if !inString && !inComment && char == '/' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '*' {
|
||||
inMultilineComment = true
|
||||
currentStatement.WriteByte(sqlBatch[i+1]) // 写入'*'
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// 处理多行注释结束
|
||||
if inMultilineComment && char == '*' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '/' {
|
||||
inMultilineComment = false
|
||||
currentStatement.WriteByte(sqlBatch[i+1]) // 写入'/'
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// 处理换行符,结束单行注释
|
||||
if inComment && (char == '\n' || char == '\r') {
|
||||
inComment = false
|
||||
}
|
||||
|
||||
// 分割SQL语句
|
||||
if char == ';' && !inString && !inMultilineComment && !inComment {
|
||||
// 收集到分号后面的单行注释(如果有)
|
||||
for j := i + 1; j < len(sqlBatch); j++ {
|
||||
nextChar := sqlBatch[j]
|
||||
|
||||
// 检查是否是注释开始
|
||||
if nextChar == '-' && j+1 < len(sqlBatch) && sqlBatch[j+1] == '-' {
|
||||
// 找到了注释,添加到当前语句
|
||||
currentStatement.WriteByte(nextChar) // 添加'-'
|
||||
currentStatement.WriteByte(sqlBatch[j+1]) // 添加第二个'-'
|
||||
j++
|
||||
|
||||
// 读取直到行尾
|
||||
for k := j + 1; k < len(sqlBatch); k++ {
|
||||
commentChar := sqlBatch[k]
|
||||
currentStatement.WriteByte(commentChar)
|
||||
j = k
|
||||
|
||||
if commentChar == '\n' || commentChar == '\r' {
|
||||
break
|
||||
}
|
||||
}
|
||||
i = j
|
||||
break
|
||||
} else if !isWhitespace(nextChar) {
|
||||
// 非注释且非空白字符,停止收集
|
||||
break
|
||||
} else {
|
||||
// 是空白字符,添加到当前语句
|
||||
currentStatement.WriteByte(nextChar)
|
||||
i = j
|
||||
}
|
||||
}
|
||||
|
||||
statements = append(statements, strings.TrimSpace(currentStatement.String()))
|
||||
currentStatement.Reset()
|
||||
continue
|
||||
}
|
||||
|
||||
escaped = false
|
||||
}
|
||||
|
||||
// 处理最后一条可能没有分号的语句
|
||||
lastStatement := strings.TrimSpace(currentStatement.String())
|
||||
if lastStatement != "" {
|
||||
statements = append(statements, lastStatement)
|
||||
}
|
||||
|
||||
return statements
|
||||
}
|
||||
|
||||
// 判断字符是否为空白字符
|
||||
func isWhitespace(c byte) bool {
|
||||
return unicode.IsSpace(rune(c))
|
||||
}
|
||||
36
dskit/doris/logs.go
Normal file
36
dskit/doris/logs.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package doris
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// 日志相关的操作
|
||||
const (
|
||||
TimeseriesAggregationTimestamp = "__ts__"
|
||||
)
|
||||
|
||||
// TODO: 待测试, MAP/ARRAY/STRUCT/JSON 等类型能否处理
|
||||
func (d *Doris) QueryLogs(ctx context.Context, query *QueryParam) ([]map[string]interface{}, error) {
|
||||
// 等同于 Query()
|
||||
return d.Query(ctx, query)
|
||||
}
|
||||
|
||||
// 本质是查询时序数据, 取第一组, SQL由上层封装, 不再做复杂的解析和截断
|
||||
func (d *Doris) QueryHistogram(ctx context.Context, query *QueryParam) ([][]float64, error) {
|
||||
values, err := d.QueryTimeseries(ctx, query)
|
||||
if err != nil {
|
||||
return [][]float64{}, nil
|
||||
}
|
||||
if len(values) > 0 && len(values[0].Values) > 0 {
|
||||
items := values[0].Values
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
if len(items[i]) > 0 && len(items[j]) > 0 {
|
||||
return items[i][0] < items[j][0]
|
||||
}
|
||||
return false
|
||||
})
|
||||
return items, nil
|
||||
}
|
||||
return [][]float64{}, nil
|
||||
}
|
||||
126
dskit/doris/template.md
Normal file
126
dskit/doris/template.md
Normal file
@@ -0,0 +1,126 @@
|
||||
## SQL变量
|
||||
|
||||
| 字段名 | 含义 | 使用场景 |
|
||||
| ---- | ---- | ---- |
|
||||
|database|数据库|无|
|
||||
|table|表名||
|
||||
|time_field|时间戳的字段||
|
||||
|query|查询条件|日志原文|
|
||||
|from|开始时间||
|
||||
|to|结束时间||
|
||||
|aggregation|聚合算法|时序图|
|
||||
|field|聚合的字段|时序图|
|
||||
|limit|分页参数|日志原文|
|
||||
|offset|分页参数|日志原文|
|
||||
|interval|直方图的时间粒度|直方图|
|
||||
|
||||
## 日志原文
|
||||
### 直方图
|
||||
|
||||
```
|
||||
# 如何计算interval的值
|
||||
max := 60 // 最多60个柱子
|
||||
interval := ($to-$from) / max
|
||||
interval = interval - interval%10
|
||||
if interval <= 0 {
|
||||
interval = 60
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
SELECT count() as cnt,
|
||||
FLOOR(UNIX_TIMESTAMP($time_field) / $interval) * $interval AS __ts__
|
||||
FROM $table
|
||||
WHERE $time_field BETWEEN FROM_UNIXTIME($from) AND FROM_UNIXTIME($to)
|
||||
GROUP BY __ts__;
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"database":"$database",
|
||||
"sql":"$sql",
|
||||
"keys:": {
|
||||
"valueKey":"cnt",
|
||||
"timeKey":"__ts__"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 日志原文
|
||||
|
||||
```
|
||||
SELECT * from $table
|
||||
WHERE $time_field BETWEEN FROM_UNIXTIME($from) AND FROM_UNIXTIME($to)
|
||||
ORDER by $time_filed
|
||||
LIMIT $limit OFFSET $offset;
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"database":"$database",
|
||||
"sql":"$sql"
|
||||
}
|
||||
```
|
||||
|
||||
## 时序图
|
||||
|
||||
### 日志行数
|
||||
|
||||
```
|
||||
SELECT COUNT() AS cnt, DATE_FORMAT(date, '%Y-%m-%d %H:%i:00') AS __ts__
|
||||
FROM nginx_access_log
|
||||
WHERE $time_field BETWEEN FROM_UNIXTIME($from) AND FROM_UNIXTIME($to)
|
||||
GROUP BY __ts__
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"database":"$database",
|
||||
"sql":"$sql",
|
||||
"keys:": {
|
||||
"valueKey":"cnt",
|
||||
"timeKey":"__ts__"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### max/min/avg/sum
|
||||
|
||||
```
|
||||
SELECT $aggregation($field) AS series, DATE_FORMAT(date, '%Y-%m-%d %H:%i:00') AS __ts__
|
||||
FROM nginx_access_log
|
||||
WHERE $time_field BETWEEN FROM_UNIXTIME($from) AND FROM_UNIXTIME($to)
|
||||
GROUP BY __ts__
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"database":"$database",
|
||||
"sql":"$sql",
|
||||
"keys:": {
|
||||
"valueKey":"series",
|
||||
"timeKey":"__ts__"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### 分位值
|
||||
|
||||
```
|
||||
SELECT percentile($field, 0.95) AS series, DATE_FORMAT(date, '%Y-%m-%d %H:%i:00') AS __ts__
|
||||
FROM nginx_access_log
|
||||
WHERE $time_field BETWEEN FROM_UNIXTIME($from) AND FROM_UNIXTIME($to)
|
||||
GROUP BY __ts__
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"database":"$database",
|
||||
"sql":"$sql",
|
||||
"keys:": {
|
||||
"valueKey":"series",
|
||||
"timeKey":"__ts__"
|
||||
}
|
||||
}
|
||||
```
|
||||
108
dskit/doris/timeseries.go
Normal file
108
dskit/doris/timeseries.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package doris
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/dskit/sqlbase"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
)
|
||||
|
||||
const (
|
||||
TimeFieldFormatEpochMilli = "epoch_millis"
|
||||
TimeFieldFormatEpochSecond = "epoch_second"
|
||||
TimeFieldFormatDateTime = "datetime"
|
||||
)
|
||||
|
||||
// 不再拼接SQL, 完全信赖用户的输入
|
||||
type QueryParam struct {
|
||||
Database string `json:"database"`
|
||||
Sql string `json:"sql"`
|
||||
Keys types.Keys `json:"keys" mapstructure:"keys"`
|
||||
}
|
||||
|
||||
var (
|
||||
DorisBannedOp = map[string]struct{}{
|
||||
"CREATE": {},
|
||||
"INSERT": {},
|
||||
"ALTER": {},
|
||||
"REVOKE": {},
|
||||
"DROP": {},
|
||||
"RENAME": {},
|
||||
"ATTACH": {},
|
||||
"DETACH": {},
|
||||
"OPTIMIZE": {},
|
||||
"TRUNCATE": {},
|
||||
"SET": {},
|
||||
}
|
||||
)
|
||||
|
||||
// Query executes a given SQL query in Doris and returns the results with MaxQueryRows check
|
||||
func (d *Doris) Query(ctx context.Context, query *QueryParam) ([]map[string]interface{}, error) {
|
||||
// 校验SQL的合法性, 过滤掉 write请求
|
||||
sqlItem := strings.Split(strings.ToUpper(query.Sql), " ")
|
||||
for _, item := range sqlItem {
|
||||
if _, ok := DorisBannedOp[item]; ok {
|
||||
return nil, fmt.Errorf("operation %s is forbid, only read db, please check your sql", item)
|
||||
}
|
||||
}
|
||||
|
||||
// 检查查询结果行数
|
||||
err := d.CheckMaxQueryRows(ctx, query.Database, query.Sql)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rows, err := d.ExecQuery(ctx, query.Database, query.Sql)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
// QueryTimeseries executes a time series data query using the given parameters with MaxQueryRows check
|
||||
func (d *Doris) QueryTimeseries(ctx context.Context, query *QueryParam) ([]types.MetricValues, error) {
|
||||
// 使用 Query 方法执行查询,Query方法内部已包含MaxQueryRows检查
|
||||
rows, err := d.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return sqlbase.FormatMetricValues(query.Keys, rows), nil
|
||||
}
|
||||
|
||||
// CheckMaxQueryRows checks if the query result exceeds the maximum allowed rows
|
||||
func (d *Doris) CheckMaxQueryRows(ctx context.Context, database, sql string) error {
|
||||
timeoutCtx, cancel := d.createTimeoutContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
cleanedSQL := strings.ReplaceAll(sql, ";", "")
|
||||
checkQuery := fmt.Sprintf("SELECT COUNT(*) as count FROM (%s) AS subquery;", cleanedSQL)
|
||||
|
||||
// 执行计数查询
|
||||
results, err := d.ExecQuery(timeoutCtx, database, checkQuery)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(results) > 0 {
|
||||
if count, exists := results[0]["count"]; exists {
|
||||
v, err := sqlbase.ParseFloat64Value(count)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
maxQueryRows := d.MaxQueryRows
|
||||
if maxQueryRows == 0 {
|
||||
maxQueryRows = 500
|
||||
}
|
||||
|
||||
if v > float64(maxQueryRows) {
|
||||
return fmt.Errorf("query result rows count %d exceeds the maximum limit %d", int(v), maxQueryRows)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -115,14 +115,14 @@ func (m *MySQL) NewConn(ctx context.Context, database string) (*gorm.DB, error)
|
||||
}()
|
||||
|
||||
dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8&parseTime=True", shard.User, shard.Password, shard.Addr, database)
|
||||
|
||||
return sqlbase.NewDB(
|
||||
db, err = sqlbase.NewDB(
|
||||
ctx,
|
||||
mysql.Open(dsn),
|
||||
shard.MaxIdleConns,
|
||||
shard.MaxOpenConns,
|
||||
time.Duration(shard.ConnMaxLifetime)*time.Second,
|
||||
)
|
||||
return db, err
|
||||
}
|
||||
|
||||
func (m *MySQL) ShowDatabases(ctx context.Context) ([]string, error) {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -117,7 +118,8 @@ func (p *PostgreSQL) NewConn(ctx context.Context, database string) (*gorm.DB, er
|
||||
}()
|
||||
|
||||
// Simplified connection logic for PostgreSQL
|
||||
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", p.Shard.User, p.Shard.Password, p.Shard.Addr, database)
|
||||
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", url.QueryEscape(p.Shard.User), url.QueryEscape(p.Shard.Password), p.Shard.Addr, database)
|
||||
|
||||
db, err = sqlbase.NewDB(
|
||||
ctx,
|
||||
postgres.Open(dsn),
|
||||
|
||||
@@ -48,7 +48,7 @@ func CloseDB(db *gorm.DB) error {
|
||||
|
||||
// ShowTables retrieves a list of all tables in the specified database
|
||||
func ShowTables(ctx context.Context, db *gorm.DB, query string) ([]string, error) {
|
||||
var tables []string
|
||||
tables := make([]string, 0)
|
||||
|
||||
rows, err := db.WithContext(ctx).Raw(query).Rows()
|
||||
if err != nil {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"crypto/md5"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
@@ -78,10 +79,11 @@ func FormatMetricValues(keys types.Keys, rows []map[string]interface{}, ignoreDe
|
||||
}
|
||||
|
||||
if keys.TimeKey == "" {
|
||||
keys.TimeKey = "time"
|
||||
}
|
||||
|
||||
if len(keys.TimeKey) > 0 {
|
||||
// 默认支持 __time__ 和 time 作为时间字段
|
||||
// 用户可以使用 as __time__ 来避免与表中已有的 time 字段冲突
|
||||
keyMap["__time__"] = "time"
|
||||
keyMap["time"] = "time"
|
||||
} else {
|
||||
keyMap[keys.TimeKey] = "time"
|
||||
}
|
||||
|
||||
@@ -121,6 +123,11 @@ func FormatMetricValues(keys types.Keys, rows []map[string]interface{}, ignoreDe
|
||||
|
||||
// Compile and store the metric values
|
||||
for metricName, value := range metricValue {
|
||||
// NaN 无法执行json.Marshal(), 接口会报错
|
||||
if math.IsNaN(value) {
|
||||
continue
|
||||
}
|
||||
|
||||
metrics := make(model.Metric)
|
||||
var labelsStr []string
|
||||
|
||||
@@ -136,7 +143,20 @@ func FormatMetricValues(keys types.Keys, rows []map[string]interface{}, ignoreDe
|
||||
labelsStrHash := fmt.Sprintf("%x", md5.Sum([]byte(strings.Join(labelsStr, ","))))
|
||||
|
||||
// Append new values to the existing metric, if present
|
||||
ts, exists := metricTs[keys.TimeKey]
|
||||
var ts float64
|
||||
var exists bool
|
||||
|
||||
if keys.TimeKey == "" {
|
||||
// 没有配置 timeKey,按优先级查找:__time__ > time
|
||||
ts, exists = metricTs["__time__"]
|
||||
if !exists {
|
||||
ts, exists = metricTs["time"]
|
||||
}
|
||||
} else {
|
||||
// 用户配置了 timeKey,使用用户配置的
|
||||
ts, exists = metricTs[keys.TimeKey]
|
||||
}
|
||||
|
||||
if !exists {
|
||||
ts = float64(time.Now().Unix()) // Default to current time if not specified
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user