mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-13 11:28:56 +00:00
Compare commits
324 Commits
refactor-c
...
aiagent
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f5fb52024b | ||
|
|
04e9cd08da | ||
|
|
1310b8a522 | ||
|
|
0d105e1f9d | ||
|
|
77bca17970 | ||
|
|
3fb5f446be | ||
|
|
f2384cc12b | ||
|
|
72e16b25f3 | ||
|
|
59c85a8efb | ||
|
|
f50f05ae01 | ||
|
|
ef6676d3d6 | ||
|
|
eacf1b650a | ||
|
|
7566b9b690 | ||
|
|
5e01e8e021 | ||
|
|
61c7bbd0d8 | ||
|
|
303ef3476e | ||
|
|
b49ab44818 | ||
|
|
4d37594a0a | ||
|
|
5b941d2ce5 | ||
|
|
932199fde1 | ||
|
|
5d1636d1a5 | ||
|
|
6167eb3b13 | ||
|
|
5beee98cde | ||
|
|
c34c008080 | ||
|
|
75218f9d5a | ||
|
|
341f82ecde | ||
|
|
a6056a5fab | ||
|
|
01e8370882 | ||
|
|
8b11e18754 | ||
|
|
aa749065da | ||
|
|
f5811bc5f7 | ||
|
|
5de63d7307 | ||
|
|
6a44da4dda | ||
|
|
0a65616fbb | ||
|
|
a0e8c5f764 | ||
|
|
d64dbb6909 | ||
|
|
656b91e976 | ||
|
|
fe6dce403f | ||
|
|
faa348a086 | ||
|
|
635b781ae1 | ||
|
|
f60771ad9c | ||
|
|
6bd2f9a89f | ||
|
|
a76049822c | ||
|
|
97746f7469 | ||
|
|
903d75e4b8 | ||
|
|
42637e546d | ||
|
|
7bf000932d | ||
|
|
3202cd1410 | ||
|
|
e28dd079f9 | ||
|
|
72cb35a4ed | ||
|
|
80d0193ac0 | ||
|
|
54a8e2590e | ||
|
|
b296d5bcc3 | ||
|
|
996c9812bd | ||
|
|
0f8bb8b2af | ||
|
|
8c54a97292 | ||
|
|
47cab69088 | ||
|
|
c432636d8d | ||
|
|
959b0389c6 | ||
|
|
3d8f1b3ef5 | ||
|
|
ce838036ad | ||
|
|
578ac096e5 | ||
|
|
48ee6117e9 | ||
|
|
5afd6a60e9 | ||
|
|
37372ae9ea | ||
|
|
48e7c34ebf | ||
|
|
acd0ec4bef | ||
|
|
c1ad946bc5 | ||
|
|
4c2affc7da | ||
|
|
273d282beb | ||
|
|
3e86656381 | ||
|
|
f942772d2b | ||
|
|
fbc0c22d7a | ||
|
|
abd452a6df | ||
|
|
47f05627d9 | ||
|
|
edd8e2a3db | ||
|
|
c4ca2920ef | ||
|
|
afc8d7d21c | ||
|
|
c0e13e2870 | ||
|
|
4f186a71ba | ||
|
|
104c275f2d | ||
|
|
2ba7a970e8 | ||
|
|
c98241b3fd | ||
|
|
b30caf625b | ||
|
|
32e8b961c2 | ||
|
|
2ff0a8fdbb | ||
|
|
7ff74d0948 | ||
|
|
da58d825c0 | ||
|
|
0014b77c4d | ||
|
|
fc7fdde2d5 | ||
|
|
61b63fc75c | ||
|
|
80f564ec63 | ||
|
|
203c2a885b | ||
|
|
9bee3e1379 | ||
|
|
c214580e87 | ||
|
|
f6faed0659 | ||
|
|
990819d6c1 | ||
|
|
5fff517cce | ||
|
|
db1bb34277 | ||
|
|
81e37c9ed4 | ||
|
|
27ec6a2d04 | ||
|
|
372a8cff2f | ||
|
|
68850800ed | ||
|
|
717f7f1c4b | ||
|
|
82e1e715ad | ||
|
|
d1058639fc | ||
|
|
709eda93a8 | ||
|
|
48e69449c5 | ||
|
|
e5218bdba0 | ||
|
|
543b334e64 | ||
|
|
3644200488 | ||
|
|
ceddf1f552 | ||
|
|
faa4c4f438 | ||
|
|
4f8b6157a3 | ||
|
|
7fd7040c7f | ||
|
|
7fa1a41437 | ||
|
|
f7b406078f | ||
|
|
f6b10403d9 | ||
|
|
f4ce0bccfc | ||
|
|
f26ce4487d | ||
|
|
9f31f3b57d | ||
|
|
c7a97a9767 | ||
|
|
f94068e611 | ||
|
|
2cd5edf691 | ||
|
|
0ffc67f35f | ||
|
|
6dc5ac47b7 | ||
|
|
2526440efa | ||
|
|
2f8b8fad62 | ||
|
|
9c19201c13 | ||
|
|
4758c14a46 | ||
|
|
2e54ab8c2f | ||
|
|
67f79c2f88 | ||
|
|
749ae70bd7 | ||
|
|
e2dba9b3d3 | ||
|
|
2228842b2f | ||
|
|
38fe37a286 | ||
|
|
7daf1e8c43 | ||
|
|
8706ded776 | ||
|
|
f637078dd9 | ||
|
|
8aa7b1060d | ||
|
|
18634a33b2 | ||
|
|
7ed1b80759 | ||
|
|
3d240704f6 | ||
|
|
ce0322bbd7 | ||
|
|
66f62ca8c5 | ||
|
|
d11d73f6bc | ||
|
|
dee1fe2d61 | ||
|
|
b3da24f18a | ||
|
|
29ea4f6ed2 | ||
|
|
5272b11efc | ||
|
|
c322601138 | ||
|
|
f1357d6f33 | ||
|
|
728d70c707 | ||
|
|
bf93932b22 | ||
|
|
57581be350 | ||
|
|
5793f089f6 | ||
|
|
fa49449588 | ||
|
|
876f1d1084 | ||
|
|
678830be37 | ||
|
|
5e30f3a00d | ||
|
|
7f1eefd033 | ||
|
|
c8dd26ca4c | ||
|
|
37c57e66ea | ||
|
|
878e940325 | ||
|
|
cbc715305d | ||
|
|
5011766c70 | ||
|
|
b3ed8a1e8c | ||
|
|
814ded90b6 | ||
|
|
43e89040eb | ||
|
|
3d339fe03c | ||
|
|
7618858912 | ||
|
|
15b4ef8611 | ||
|
|
5083a5cc96 | ||
|
|
d51e83d7d4 | ||
|
|
601d4f0c95 | ||
|
|
90fac12953 | ||
|
|
19d76824d9 | ||
|
|
1341554bbc | ||
|
|
fd3ce338cb | ||
|
|
b8f36ce3cb | ||
|
|
037112a9e6 | ||
|
|
c6e75d31a1 | ||
|
|
bd24f5b056 | ||
|
|
89551c8edb | ||
|
|
042b44940d | ||
|
|
8cd8674848 | ||
|
|
7bb6ac8a03 | ||
|
|
76b35276af | ||
|
|
439a21b784 | ||
|
|
47e70a2dba | ||
|
|
16b3cb1abc | ||
|
|
32995c1b2d | ||
|
|
b4fa36fa0e | ||
|
|
f412f82eb8 | ||
|
|
9da1cd506b | ||
|
|
99ea838863 | ||
|
|
7feb003b72 | ||
|
|
b0a053361f | ||
|
|
959f75394b | ||
|
|
03e95973b2 | ||
|
|
e890705167 | ||
|
|
6716f1bdf1 | ||
|
|
739b9406a4 | ||
|
|
77f280d1cc | ||
|
|
04fe1b9dd6 | ||
|
|
552758e0e1 | ||
|
|
68bc474c1b | ||
|
|
f692035deb | ||
|
|
eb441353c3 | ||
|
|
b606b22ae6 | ||
|
|
1de0428860 | ||
|
|
3d0c288c9f | ||
|
|
343814a802 | ||
|
|
12e2761467 | ||
|
|
0edd5ee772 | ||
|
|
5e430cedc7 | ||
|
|
a791a9901e | ||
|
|
222cdd76f0 | ||
|
|
ed4e3937e0 | ||
|
|
60f9e1c48e | ||
|
|
276dfe7372 | ||
|
|
4a6dacbe30 | ||
|
|
48eebba11a | ||
|
|
eca82e5ec2 | ||
|
|
21478fcf3d | ||
|
|
a87c856299 | ||
|
|
ba035a446d | ||
|
|
bf840e6bb2 | ||
|
|
cd01092aed | ||
|
|
e202fd50c8 | ||
|
|
f0e5062485 | ||
|
|
861fe96de5 | ||
|
|
5b66ada96d | ||
|
|
d5a98debff | ||
|
|
4977052a67 | ||
|
|
dcc461e587 | ||
|
|
f5ce1733bb | ||
|
|
436cf25409 | ||
|
|
038f68b0b7 | ||
|
|
96ef1895b7 | ||
|
|
eeaa7b46f1 | ||
|
|
dc525352f1 | ||
|
|
98a3fe9375 | ||
|
|
74b0f802ec | ||
|
|
85bd3148d5 | ||
|
|
0931fa9603 | ||
|
|
65cdb2da9e | ||
|
|
9ad6514af6 | ||
|
|
302c6549e4 | ||
|
|
a3122270e6 | ||
|
|
1245c453bb | ||
|
|
9c5ccf0c8f | ||
|
|
cd468af250 | ||
|
|
2d3449c0ec | ||
|
|
e15bdbce92 | ||
|
|
3890243d42 | ||
|
|
37fb4ee867 | ||
|
|
6db63eafc1 | ||
|
|
1e9cbfc316 | ||
|
|
4f95554fe3 | ||
|
|
8eba9aa92f | ||
|
|
6ba74b8e21 | ||
|
|
8ea4632681 | ||
|
|
f958f27de1 | ||
|
|
1bdfa3e032 | ||
|
|
143880cd46 | ||
|
|
38f0b4f1bb | ||
|
|
2bccd5be99 | ||
|
|
7b328b3eaa | ||
|
|
8bd5b90e94 | ||
|
|
96629e284f | ||
|
|
67d2875690 | ||
|
|
238895a1f8 | ||
|
|
fb341b645d | ||
|
|
2d84fd8cf3 | ||
|
|
2611f87c41 | ||
|
|
a5b7aa7a26 | ||
|
|
0714a0f8f1 | ||
|
|
063cc750e1 | ||
|
|
b2a912d72f | ||
|
|
4ba745f442 | ||
|
|
fa7d46ecad | ||
|
|
a5a43df44f | ||
|
|
fbf1d68b84 | ||
|
|
ca712f62a4 | ||
|
|
84ee14d21e | ||
|
|
c9cf1cfdd2 | ||
|
|
9d1c01107f | ||
|
|
7ea31b5c6d | ||
|
|
e8e1c67cc8 | ||
|
|
8079bcd288 | ||
|
|
33b178ce82 | ||
|
|
28c9cd7b43 | ||
|
|
b771e8a3e8 | ||
|
|
4945e98200 | ||
|
|
a938ea3e56 | ||
|
|
25c339025b | ||
|
|
bb0ee35275 | ||
|
|
0fc54ad173 | ||
|
|
1f95e2df94 | ||
|
|
d2969f34ef | ||
|
|
d9a34959dc | ||
|
|
bc6ff7f4ba | ||
|
|
514913a97a | ||
|
|
affc610b7b | ||
|
|
a098d5d39c | ||
|
|
05c3f1e0e4 | ||
|
|
d5740164f2 | ||
|
|
8c2383c410 | ||
|
|
9af024fb99 | ||
|
|
12f3cc21e1 | ||
|
|
0b3bb54eb4 | ||
|
|
da813e2b0c | ||
|
|
50fa2499b7 | ||
|
|
2c5ae5b3a9 | ||
|
|
522932aeb4 | ||
|
|
35ac0ddea5 | ||
|
|
26fa750309 | ||
|
|
1eba607aeb | ||
|
|
6aadd159af | ||
|
|
b6ad87523e | ||
|
|
ea5b6845de | ||
|
|
5ba5096da2 | ||
|
|
85786d985d |
22
.github/workflows/issue-translator.yml
vendored
Normal file
22
.github/workflows/issue-translator.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: 'Issue Translator'
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
translate:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Translate Issues
|
||||
uses: usthe/issues-translate-action@v2.7
|
||||
with:
|
||||
# 是否翻译 issue 标题
|
||||
IS_MODIFY_TITLE: true
|
||||
# GitHub Token
|
||||
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# 自定义翻译标注(可选)
|
||||
# CUSTOM_BOT_NOTE: "Translation by bot"
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -58,6 +58,10 @@ _test
|
||||
.idea
|
||||
.index
|
||||
.vscode
|
||||
.issue
|
||||
.issue/*
|
||||
.cursor
|
||||
.claude
|
||||
.DS_Store
|
||||
.cache-loader
|
||||
.payload
|
||||
|
||||
41
.typos.toml
Normal file
41
.typos.toml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Configuration for typos tool
|
||||
[files]
|
||||
extend-exclude = [
|
||||
# Ignore auto-generated easyjson files
|
||||
"*_easyjson.go",
|
||||
# Ignore binary files
|
||||
"*.gz",
|
||||
"*.tar",
|
||||
"n9e",
|
||||
"n9e-*"
|
||||
]
|
||||
|
||||
[default.extend-identifiers]
|
||||
# Didi is a company name (DiDi), not a typo
|
||||
Didi = "Didi"
|
||||
# datas is intentionally used as plural of data (slice variable)
|
||||
datas = "datas"
|
||||
# pendings is intentionally used as plural
|
||||
pendings = "pendings"
|
||||
pendingsUseByRecover = "pendingsUseByRecover"
|
||||
pendingsUseByRecoverMap = "pendingsUseByRecoverMap"
|
||||
# typs is intentionally used as shorthand for types (parameter name)
|
||||
typs = "typs"
|
||||
|
||||
[default.extend-words]
|
||||
# Some false positives
|
||||
ba = "ba"
|
||||
# Specific corrections for ambiguous typos
|
||||
contigious = "contiguous"
|
||||
onw = "own"
|
||||
componet = "component"
|
||||
Patten = "Pattern"
|
||||
Requets = "Requests"
|
||||
Mis = "Miss"
|
||||
exporer = "exporter"
|
||||
soruce = "source"
|
||||
verison = "version"
|
||||
Configations = "Configurations"
|
||||
emmited = "emitted"
|
||||
Utlization = "Utilization"
|
||||
serie = "series"
|
||||
109
README.md
109
README.md
@@ -3,7 +3,7 @@
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源告警管理专家</b>
|
||||
<b>Open-Source Alerting Expert</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -25,94 +25,93 @@
|
||||
|
||||
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
## 夜莺是什么
|
||||
## 🎯 What is Nightingale
|
||||
|
||||
夜莺监控(Nightingale)是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发。
|
||||
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
|
||||
|
||||
夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展委员会(CCF ODC),为 CCF ODC 成立后接受捐赠的第一个开源项目。
|
||||
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
|
||||
>
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
|
||||
|
||||
## 夜莺的工作逻辑
|
||||

|
||||
|
||||
很多用户已经自行采集了指标、日志数据,此时就把存储库(VictoriaMetrics、ElasticSearch等)作为数据源接入夜莺,即可在夜莺里配置告警规则、通知规则,完成告警事件的生成和派发。
|
||||
## 💡 How Nightingale Works
|
||||
|
||||

|
||||
Many users have already collected metrics and log data. In this case, you can connect your storage repositories (such as VictoriaMetrics, ElasticSearch, etc.) as data sources in Nightingale. This allows you to configure alerting rules and notification rules within Nightingale, enabling the generation and distribution of alarms.
|
||||
|
||||
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
|
||||

|
||||
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
|
||||
Nightingale itself does not provide monitoring data collection capabilities. We recommend using [Categraf](https://github.com/flashcatcloud/categraf) as the collector, which integrates seamlessly with Nightingale.
|
||||
|
||||
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) can collect monitoring data from operating systems, network devices, various middleware, and databases. It pushes this data to Nightingale via the `Prometheus Remote Write` protocol. Nightingale then stores the monitoring data in a time-series database (such as Prometheus, VictoriaMetrics, etc.) and provides alerting and visualization capabilities.
|
||||
|
||||

|
||||
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
|
||||
|
||||
> 上图中,机房A和中心机房的网络链路很好,所以直接由中心端的夜莺进程做告警引擎,机房B和中心机房的网络链路不好,所以在机房B部署了 `n9e-edge` 做告警引擎,对机房B的数据源做告警判定。
|
||||

|
||||
|
||||
## 告警降噪、升级、协同
|
||||
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
|
||||
|
||||
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介(电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
|
||||
## 🔕 Alert Noise Reduction, Escalation, and Collaboration
|
||||
|
||||
如果您有更高级的需求,比如:
|
||||
Nightingale focuses on being an alerting engine, responsible for generating alarms and flexibly distributing them based on rules. It supports 20 built-in notification medias (such as phone calls, SMS, email, DingTalk, Slack, etc.).
|
||||
|
||||
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
|
||||
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
|
||||
If you have more advanced requirements, such as:
|
||||
- Want to consolidate events from multiple monitoring systems into one platform for unified noise reduction, response handling, and data analysis.
|
||||
- Want to support personnel scheduling, practice on-call culture, and support alert escalation (to avoid missing alerts) and collaborative handling.
|
||||
|
||||
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
|
||||
Then Nightingale is not suitable. It is recommended that you choose on-call products such as PagerDuty and FlashDuty. These products are simple and easy to use.
|
||||
|
||||
## 🗨️ Communication Channels
|
||||
|
||||
## 相关资料 & 交流渠道
|
||||
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助(PPT链接在文末)
|
||||
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
|
||||
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
|
||||
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
|
||||
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
|
||||
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
|
||||
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
|
||||
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://n9e.github.io/).
|
||||
|
||||
## 🔑 Key Features
|
||||
|
||||
## 关键特性简介
|
||||

|
||||
|
||||

|
||||
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
|
||||
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
|
||||
- It introduces the concept of business groups and a permission system to manage various rules in a categorized manner.
|
||||
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
|
||||
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
|
||||
|
||||
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
|
||||
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
|
||||
- 支持业务组概念,引入权限体系,分门别类管理各类规则
|
||||
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
|
||||
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
|
||||

|
||||
|
||||

|
||||
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
|
||||
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
|
||||
|
||||
- 夜莺存档了历史告警事件,支持多维度的查询和统计
|
||||
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
|
||||

|
||||
|
||||

|
||||
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
|
||||
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
|
||||
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
|
||||
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
|
||||
|
||||
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
|
||||
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
|
||||
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
|
||||
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
|
||||

|
||||
|
||||
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
|
||||
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
|
||||
- For machine-related monitoring data collected by Categraf, it is advisable to use Nightingale's built-in dashboards for viewing. This is because Categraf's metric naming follows Telegraf's convention, which differs from that of Node Exporter.
|
||||
- Due to Nightingale's concept of business groups (where machines can belong to different groups), there may be scenarios where you only want to view machines within the current business group on the dashboard. Thus, Nightingale's dashboards can be linked with business groups for interactive filtering.
|
||||
|
||||

|
||||
## 🌟 Stargazers over time
|
||||
|
||||
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
|
||||
- 如果你已经习惯了 Grafana,建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
|
||||
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
|
||||
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
|
||||
|
||||
## 广受关注
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## 感谢众多企业的信赖
|
||||
## 🔥 Users
|
||||
|
||||

|
||||

|
||||
|
||||
## 社区共建
|
||||
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践,共建专业、活跃的夜莺开源社区。
|
||||
- ❤️ 夜莺贡献者
|
||||
## 🤝 Community Co-Building
|
||||
|
||||
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
|
||||
- ❤️ Nightingale Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
## 📜 License
|
||||
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
|
||||
|
||||
113
README_en.md
113
README_en.md
@@ -1,113 +0,0 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>Open-source Alert Management Expert, an Integrated Observability Platform</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
|
||||
## What is Nightingale
|
||||
|
||||
Nightingale is an open-source project focused on alerting. Similar to Grafana's data source integration approach, Nightingale also connects with various existing data sources. However, while Grafana focuses on visualization, Nightingale focuses on alerting engines.
|
||||
|
||||
Originally developed and open-sourced by Didi, Nightingale was donated to the China Computer Federation Open Source Development Committee (CCF ODC) on May 11, 2022, becoming the first open-source project accepted by the CCF ODC after its establishment.
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
- 👉 [Documentation](https://flashcat.cloud/docs/) | [Download](https://flashcat.cloud/download/nightingale/)
|
||||
- ❤️ [Report a Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
|
||||
- ℹ️ For faster access, the above documentation and download sites are hosted on [FlashcatCloud](https://flashcat.cloud).
|
||||
|
||||
## Features
|
||||
|
||||
- **Integration with Multiple Time-Series Databases:** Supports integration with various time-series databases such as Prometheus, VictoriaMetrics, Thanos, Mimir, M3DB, and TDengine, enabling unified alert management.
|
||||
- **Advanced Alerting Capabilities:** Comes with built-in support for multiple alerting rules, extensible to common notification channels. It also supports alert suppression, silencing, subscription, self-healing, and alert event management.
|
||||
- **High-Performance Visualization Engine:** Offers various chart styles with numerous built-in dashboard templates and the ability to import Grafana templates. Ready to use with a business-friendly open-source license.
|
||||
- **Support for Common Collectors:** Compatible with [Categraf](https://flashcat.cloud/product/categraf), Telegraf, Grafana-agent, Datadog-agent, and various exporters as collectors—there's no data that can't be monitored.
|
||||
- **Seamless Integration with [Flashduty](https://flashcat.cloud/product/flashcat-duty/):** Enables alert aggregation, acknowledgment, escalation, scheduling, and IM integration, ensuring no alerts are missed, reducing unnecessary interruptions, and enhancing efficient collaboration.
|
||||
|
||||
|
||||
## Screenshots
|
||||
|
||||
You can switch languages and themes in the top right corner. We now support English, Simplified Chinese, and Traditional Chinese.
|
||||
|
||||

|
||||
|
||||
### Instant Query
|
||||
|
||||
Similar to the built-in query analysis page in Prometheus, Nightingale offers an ad-hoc query feature with UI enhancements. It also provides built-in PromQL metrics, allowing users unfamiliar with PromQL to quickly perform queries.
|
||||
|
||||

|
||||
|
||||
### Metric View
|
||||
|
||||
Alternatively, you can use the Metric View to access data. With this feature, Instant Query becomes less necessary, as it caters more to advanced users. Regular users can easily perform queries using the Metric View.
|
||||
|
||||

|
||||
|
||||
### Built-in Dashboards
|
||||
|
||||
Nightingale includes commonly used dashboards that can be imported and used directly. You can also import Grafana dashboards, although compatibility is limited to basic Grafana charts. If you’re accustomed to Grafana, it’s recommended to continue using it for visualization, with Nightingale serving as an alerting engine.
|
||||
|
||||

|
||||
|
||||
### Built-in Alert Rules
|
||||
|
||||
In addition to the built-in dashboards, Nightingale also comes with numerous alert rules that are ready to use out of the box.
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## Architecture
|
||||
|
||||
In most community scenarios, Nightingale is primarily used as an alert engine, integrating with multiple time-series databases to unify alert rule management. Grafana remains the preferred tool for visualization. As an alert engine, the product architecture of Nightingale is as follows:
|
||||
|
||||

|
||||
|
||||
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alert engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
|
||||
|
||||

|
||||
|
||||
|
||||
## Communication Channels
|
||||
|
||||
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
|
||||
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/).
|
||||
|
||||
## Stargazers over time
|
||||
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## Community Co-Building
|
||||
|
||||
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
|
||||
- ❤️ Nightingale Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
123
README_zh.md
Normal file
123
README_zh.md
Normal file
@@ -0,0 +1,123 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源监控告警管理专家</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
## 夜莺是什么
|
||||
|
||||
夜莺 Nightingale 是一款开源云原生监控告警工具,是中国计算机学会接受捐赠并托管的第一个开源项目,在 GitHub 上有超过 12000 颗星,广受关注和使用。夜莺的统一告警引擎,可以对接 Prometheus、Elasticsearch、ClickHouse、Loki、MySQL 等多种数据源,提供全面的告警判定、丰富的事件处理和灵活的告警分发及通知能力。
|
||||
|
||||
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
|
||||
|
||||
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务。
|
||||
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
|
||||

|
||||
|
||||
## 夜莺的工作逻辑
|
||||
|
||||
很多用户已经自行采集了指标、日志数据,此时就把存储库(VictoriaMetrics、ElasticSearch等)作为数据源接入夜莺,即可在夜莺里配置告警规则、通知规则,完成告警事件的生成和派发。
|
||||
|
||||

|
||||
|
||||
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
|
||||
|
||||
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
|
||||
|
||||
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
|
||||
|
||||

|
||||
|
||||
> 上图中,机房A和中心机房的网络链路很好,所以直接由中心端的夜莺进程做告警引擎,机房B和中心机房的网络链路不好,所以在机房B部署了 `n9e-edge` 做告警引擎,对机房B的数据源做告警判定。
|
||||
|
||||
## 告警降噪、升级、协同
|
||||
|
||||
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介(电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
|
||||
|
||||
如果您有更高级的需求,比如:
|
||||
|
||||
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
|
||||
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
|
||||
|
||||
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
|
||||
|
||||
|
||||
## 相关资料 & 交流渠道
|
||||
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助(PPT链接在文末)
|
||||
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
|
||||
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
|
||||
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
|
||||
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
|
||||
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
|
||||
|
||||
|
||||
## 关键特性简介
|
||||
|
||||

|
||||
|
||||
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
|
||||
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
|
||||
- 支持业务组概念,引入权限体系,分门别类管理各类规则
|
||||
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
|
||||
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
|
||||
|
||||

|
||||
|
||||
- 夜莺存档了历史告警事件,支持多维度的查询和统计
|
||||
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
|
||||
|
||||

|
||||
|
||||
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
|
||||
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
|
||||
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
|
||||
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
|
||||
|
||||
|
||||

|
||||
|
||||
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
|
||||
- 如果你已经习惯了 Grafana,建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
|
||||
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
|
||||
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
|
||||
|
||||
## 广受关注
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## 感谢众多企业的信赖
|
||||
|
||||

|
||||
|
||||
## 社区共建
|
||||
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践,共建专业、活跃的夜莺开源社区。
|
||||
- ❤️ 夜莺贡献者
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)
|
||||
3338
aiagent/ai_agent.go
Normal file
3338
aiagent/ai_agent.go
Normal file
File diff suppressed because it is too large
Load Diff
546
aiagent/builtin_tools.go
Normal file
546
aiagent/builtin_tools.go
Normal file
@@ -0,0 +1,546 @@
|
||||
package aiagent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
// ToolTypeBuiltin 内置工具类型
|
||||
ToolTypeBuiltin = "builtin"
|
||||
)
|
||||
|
||||
// =============================================================================
|
||||
// 数据源获取函数(支持注入,便于测试)
|
||||
// =============================================================================
|
||||
|
||||
// PromClientGetter Prometheus 客户端获取函数类型
|
||||
type PromClientGetter func(dsId int64) prom.API
|
||||
|
||||
// SQLDatasourceGetter SQL 数据源获取函数类型
|
||||
type SQLDatasourceGetter func(dsType string, dsId int64) (datasource.Datasource, bool)
|
||||
|
||||
// 默认使用 GlobalCache,可通过 SetPromClientGetter/SetSQLDatasourceGetter 替换
|
||||
var (
|
||||
getPromClientFunc PromClientGetter = defaultGetPromClient
|
||||
getSQLDatasourceFunc SQLDatasourceGetter = defaultGetSQLDatasource
|
||||
)
|
||||
|
||||
// SetPromClientGetter 设置 Prometheus 客户端获取函数(用于测试)
|
||||
func SetPromClientGetter(getter PromClientGetter) {
|
||||
getPromClientFunc = getter
|
||||
}
|
||||
|
||||
// SetSQLDatasourceGetter 设置 SQL 数据源获取函数(用于测试)
|
||||
func SetSQLDatasourceGetter(getter SQLDatasourceGetter) {
|
||||
getSQLDatasourceFunc = getter
|
||||
}
|
||||
|
||||
// ResetDatasourceGetters 重置为默认的数据源获取函数
|
||||
func ResetDatasourceGetters() {
|
||||
getPromClientFunc = defaultGetPromClient
|
||||
getSQLDatasourceFunc = defaultGetSQLDatasource
|
||||
}
|
||||
|
||||
func defaultGetPromClient(dsId int64) prom.API {
|
||||
// Default: no PromClient available. Use SetPromClientGetter to inject.
|
||||
return nil
|
||||
}
|
||||
|
||||
func defaultGetSQLDatasource(dsType string, dsId int64) (datasource.Datasource, bool) {
|
||||
return dscache.DsCache.Get(dsType, dsId)
|
||||
}
|
||||
|
||||
// BuiltinToolHandler 内置工具处理函数
|
||||
type BuiltinToolHandler func(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error)
|
||||
|
||||
// BuiltinTool 内置工具定义
|
||||
type BuiltinTool struct {
|
||||
Definition AgentTool
|
||||
Handler BuiltinToolHandler
|
||||
}
|
||||
|
||||
// builtinTools 内置工具注册表
|
||||
var builtinTools = map[string]*BuiltinTool{
|
||||
// Prometheus 相关工具
|
||||
"list_metrics": {
|
||||
Definition: AgentTool{
|
||||
Name: "list_metrics",
|
||||
Description: "搜索 Prometheus 数据源的指标名称,支持关键词模糊匹配",
|
||||
Type: ToolTypeBuiltin,
|
||||
Parameters: []ToolParameter{
|
||||
{Name: "keyword", Type: "string", Description: "搜索关键词,模糊匹配指标名", Required: false},
|
||||
{Name: "limit", Type: "integer", Description: "返回数量限制,默认30", Required: false},
|
||||
},
|
||||
},
|
||||
Handler: listMetrics,
|
||||
},
|
||||
"get_metric_labels": {
|
||||
Definition: AgentTool{
|
||||
Name: "get_metric_labels",
|
||||
Description: "获取 Prometheus 指标的所有标签键及其可选值",
|
||||
Type: ToolTypeBuiltin,
|
||||
Parameters: []ToolParameter{
|
||||
{Name: "metric", Type: "string", Description: "指标名称", Required: true},
|
||||
},
|
||||
},
|
||||
Handler: getMetricLabels,
|
||||
},
|
||||
|
||||
// SQL 类数据源相关工具
|
||||
"list_databases": {
|
||||
Definition: AgentTool{
|
||||
Name: "list_databases",
|
||||
Description: "列出 SQL 数据源(MySQL/Doris/ClickHouse/PostgreSQL)中的所有数据库",
|
||||
Type: ToolTypeBuiltin,
|
||||
Parameters: []ToolParameter{},
|
||||
},
|
||||
Handler: listDatabases,
|
||||
},
|
||||
"list_tables": {
|
||||
Definition: AgentTool{
|
||||
Name: "list_tables",
|
||||
Description: "列出指定数据库中的所有表",
|
||||
Type: ToolTypeBuiltin,
|
||||
Parameters: []ToolParameter{
|
||||
{Name: "database", Type: "string", Description: "数据库名", Required: true},
|
||||
},
|
||||
},
|
||||
Handler: listTables,
|
||||
},
|
||||
"describe_table": {
|
||||
Definition: AgentTool{
|
||||
Name: "describe_table",
|
||||
Description: "获取表的字段结构(字段名、类型、注释)",
|
||||
Type: ToolTypeBuiltin,
|
||||
Parameters: []ToolParameter{
|
||||
{Name: "database", Type: "string", Description: "数据库名", Required: true},
|
||||
{Name: "table", Type: "string", Description: "表名", Required: true},
|
||||
},
|
||||
},
|
||||
Handler: describeTable,
|
||||
},
|
||||
}
|
||||
|
||||
// GetBuiltinToolDef 获取内置工具定义
|
||||
func GetBuiltinToolDef(name string) (AgentTool, bool) {
|
||||
if tool, ok := builtinTools[name]; ok {
|
||||
return tool.Definition, true
|
||||
}
|
||||
return AgentTool{}, false
|
||||
}
|
||||
|
||||
// GetBuiltinToolDefs 获取指定的内置工具定义列表
|
||||
func GetBuiltinToolDefs(names []string) []AgentTool {
|
||||
var defs []AgentTool
|
||||
for _, name := range names {
|
||||
if def, ok := GetBuiltinToolDef(name); ok {
|
||||
defs = append(defs, def)
|
||||
}
|
||||
}
|
||||
return defs
|
||||
}
|
||||
|
||||
// GetAllBuiltinToolDefs 获取所有内置工具定义
|
||||
func GetAllBuiltinToolDefs() []AgentTool {
|
||||
defs := make([]AgentTool, 0, len(builtinTools))
|
||||
for _, tool := range builtinTools {
|
||||
defs = append(defs, tool.Definition)
|
||||
}
|
||||
return defs
|
||||
}
|
||||
|
||||
// ExecuteBuiltinTool 执行内置工具
|
||||
// 返回值:result, handled, error
|
||||
// handled 表示是否是内置工具(true 表示已处理,false 表示不是内置工具需要继续查找)
|
||||
func ExecuteBuiltinTool(ctx context.Context, name string, wfCtx *models.WorkflowContext, argsJSON string) (string, bool, error) {
|
||||
tool, exists := builtinTools[name]
|
||||
if !exists {
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
// 解析参数
|
||||
var args map[string]interface{}
|
||||
if argsJSON != "" {
|
||||
if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
|
||||
// 如果不是 JSON,尝试作为简单字符串参数
|
||||
args = map[string]interface{}{"input": argsJSON}
|
||||
}
|
||||
}
|
||||
if args == nil {
|
||||
args = make(map[string]interface{})
|
||||
}
|
||||
|
||||
result, err := tool.Handler(ctx, wfCtx, args)
|
||||
return result, true, err
|
||||
}
|
||||
|
||||
// getDatasourceId 从 wfCtx.Inputs 中获取 datasource_id
|
||||
func getDatasourceId(wfCtx *models.WorkflowContext) int64 {
|
||||
if wfCtx == nil || wfCtx.Inputs == nil {
|
||||
return 0
|
||||
}
|
||||
var dsId int64
|
||||
if dsIdStr, ok := wfCtx.Inputs["datasource_id"]; ok {
|
||||
fmt.Sscanf(dsIdStr, "%d", &dsId)
|
||||
}
|
||||
return dsId
|
||||
}
|
||||
|
||||
// getDatasourceType 从 wfCtx.Inputs 中获取 datasource_type
|
||||
func getDatasourceType(wfCtx *models.WorkflowContext) string {
|
||||
if wfCtx == nil || wfCtx.Inputs == nil {
|
||||
return ""
|
||||
}
|
||||
return wfCtx.Inputs["datasource_type"]
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Prometheus 工具实现
|
||||
// =============================================================================
|
||||
|
||||
// listMetrics 列出 Prometheus 指标
|
||||
func listMetrics(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error) {
|
||||
dsId := getDatasourceId(wfCtx)
|
||||
if dsId == 0 {
|
||||
return "", fmt.Errorf("datasource_id not found in inputs")
|
||||
}
|
||||
|
||||
keyword, _ := args["keyword"].(string)
|
||||
limit := 30
|
||||
if l, ok := args["limit"].(float64); ok && l > 0 {
|
||||
limit = int(l)
|
||||
}
|
||||
|
||||
// 获取 Prometheus 客户端
|
||||
client := getPromClientFunc(dsId)
|
||||
if client == nil {
|
||||
return "", fmt.Errorf("prometheus datasource not found: %d", dsId)
|
||||
}
|
||||
|
||||
// 调用 LabelValues 获取 __name__ 的所有值(即所有指标名)
|
||||
values, _, err := client.LabelValues(ctx, "__name__", nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get metrics: %v", err)
|
||||
}
|
||||
|
||||
// 过滤和限制
|
||||
result := make([]string, 0)
|
||||
keyword = strings.ToLower(keyword)
|
||||
for _, v := range values {
|
||||
m := string(v)
|
||||
if keyword == "" || strings.Contains(strings.ToLower(m), keyword) {
|
||||
result = append(result, m)
|
||||
if len(result) >= limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("list_metrics: found %d metrics (keyword=%s, limit=%d)", len(result), keyword, limit)
|
||||
|
||||
bytes, _ := json.Marshal(result)
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
// getMetricLabels 获取指标的标签
|
||||
func getMetricLabels(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error) {
|
||||
dsId := getDatasourceId(wfCtx)
|
||||
if dsId == 0 {
|
||||
return "", fmt.Errorf("datasource_id not found in inputs")
|
||||
}
|
||||
|
||||
metric, ok := args["metric"].(string)
|
||||
if !ok || metric == "" {
|
||||
return "", fmt.Errorf("metric parameter is required")
|
||||
}
|
||||
|
||||
client := getPromClientFunc(dsId)
|
||||
if client == nil {
|
||||
return "", fmt.Errorf("prometheus datasource not found: %d", dsId)
|
||||
}
|
||||
|
||||
// 使用 Series 接口获取指标的所有 series
|
||||
endTime := time.Now()
|
||||
startTime := endTime.Add(-1 * time.Hour)
|
||||
series, _, err := client.Series(ctx, []string{metric}, startTime, endTime)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get metric series: %v", err)
|
||||
}
|
||||
|
||||
// 聚合标签键值
|
||||
labels := make(map[string][]string)
|
||||
seen := make(map[string]map[string]bool)
|
||||
|
||||
for _, s := range series {
|
||||
for k, v := range s {
|
||||
key := string(k)
|
||||
val := string(v)
|
||||
if key == "__name__" {
|
||||
continue
|
||||
}
|
||||
if seen[key] == nil {
|
||||
seen[key] = make(map[string]bool)
|
||||
}
|
||||
if !seen[key][val] {
|
||||
seen[key][val] = true
|
||||
labels[key] = append(labels[key], val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("get_metric_labels: metric=%s, found %d labels", metric, len(labels))
|
||||
|
||||
bytes, _ := json.Marshal(labels)
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SQL 数据源工具实现
|
||||
// =============================================================================
|
||||
|
||||
// SQLMetadataQuerier SQL 元数据查询接口
|
||||
type SQLMetadataQuerier interface {
|
||||
ListDatabases(ctx context.Context) ([]string, error)
|
||||
ListTables(ctx context.Context, database string) ([]string, error)
|
||||
DescribeTable(ctx context.Context, database, table string) ([]map[string]interface{}, error)
|
||||
}
|
||||
|
||||
// listDatabases 列出数据库
|
||||
func listDatabases(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error) {
|
||||
dsId := getDatasourceId(wfCtx)
|
||||
dsType := getDatasourceType(wfCtx)
|
||||
if dsId == 0 {
|
||||
return "", fmt.Errorf("datasource_id not found in inputs")
|
||||
}
|
||||
if dsType == "" {
|
||||
return "", fmt.Errorf("datasource_type not found in inputs")
|
||||
}
|
||||
|
||||
plug, exists := getSQLDatasourceFunc(dsType, dsId)
|
||||
if !exists {
|
||||
return "", fmt.Errorf("datasource not found: %s/%d", dsType, dsId)
|
||||
}
|
||||
|
||||
// 构建查询 SQL
|
||||
var sql string
|
||||
switch dsType {
|
||||
case "mysql", "doris":
|
||||
sql = "SHOW DATABASES"
|
||||
case "ck", "clickhouse":
|
||||
sql = "SHOW DATABASES"
|
||||
case "pgsql", "postgresql":
|
||||
sql = "SELECT datname FROM pg_database WHERE datistemplate = false"
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported datasource type for list_databases: %s", dsType)
|
||||
}
|
||||
|
||||
// 执行查询
|
||||
query := map[string]interface{}{"sql": sql}
|
||||
data, _, err := plug.QueryLog(ctx, query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to list databases: %v", err)
|
||||
}
|
||||
|
||||
// 提取数据库名
|
||||
databases := extractColumnValues(data, dsType, "database")
|
||||
|
||||
logger.Debugf("list_databases: dsType=%s, found %d databases", dsType, len(databases))
|
||||
|
||||
bytes, _ := json.Marshal(databases)
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
// listTables 列出表
|
||||
func listTables(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error) {
|
||||
dsId := getDatasourceId(wfCtx)
|
||||
dsType := getDatasourceType(wfCtx)
|
||||
if dsId == 0 {
|
||||
return "", fmt.Errorf("datasource_id not found in inputs")
|
||||
}
|
||||
|
||||
database, ok := args["database"].(string)
|
||||
if !ok || database == "" {
|
||||
return "", fmt.Errorf("database parameter is required")
|
||||
}
|
||||
|
||||
plug, exists := getSQLDatasourceFunc(dsType, dsId)
|
||||
if !exists {
|
||||
return "", fmt.Errorf("datasource not found: %s/%d", dsType, dsId)
|
||||
}
|
||||
|
||||
// 构建查询 SQL
|
||||
var sql string
|
||||
switch dsType {
|
||||
case "mysql", "doris":
|
||||
sql = fmt.Sprintf("SHOW TABLES FROM `%s`", database)
|
||||
case "ck", "clickhouse":
|
||||
sql = fmt.Sprintf("SHOW TABLES FROM `%s`", database)
|
||||
case "pgsql", "postgresql":
|
||||
sql = fmt.Sprintf("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported datasource type for list_tables: %s", dsType)
|
||||
}
|
||||
|
||||
// 执行查询
|
||||
query := map[string]interface{}{"sql": sql, "database": database}
|
||||
data, _, err := plug.QueryLog(ctx, query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to list tables: %v", err)
|
||||
}
|
||||
|
||||
// 提取表名
|
||||
tables := extractColumnValues(data, dsType, "table")
|
||||
|
||||
logger.Debugf("list_tables: dsType=%s, database=%s, found %d tables", dsType, database, len(tables))
|
||||
|
||||
bytes, _ := json.Marshal(tables)
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
// describeTable 获取表结构
|
||||
func describeTable(ctx context.Context, wfCtx *models.WorkflowContext, args map[string]interface{}) (string, error) {
|
||||
dsId := getDatasourceId(wfCtx)
|
||||
dsType := getDatasourceType(wfCtx)
|
||||
if dsId == 0 {
|
||||
return "", fmt.Errorf("datasource_id not found in inputs")
|
||||
}
|
||||
|
||||
database, ok := args["database"].(string)
|
||||
if !ok || database == "" {
|
||||
return "", fmt.Errorf("database parameter is required")
|
||||
}
|
||||
table, ok := args["table"].(string)
|
||||
if !ok || table == "" {
|
||||
return "", fmt.Errorf("table parameter is required")
|
||||
}
|
||||
|
||||
plug, exists := getSQLDatasourceFunc(dsType, dsId)
|
||||
if !exists {
|
||||
return "", fmt.Errorf("datasource not found: %s/%d", dsType, dsId)
|
||||
}
|
||||
|
||||
// 构建查询 SQL
|
||||
var sql string
|
||||
switch dsType {
|
||||
case "mysql", "doris":
|
||||
sql = fmt.Sprintf("DESCRIBE `%s`.`%s`", database, table)
|
||||
case "ck", "clickhouse":
|
||||
sql = fmt.Sprintf("DESCRIBE TABLE `%s`.`%s`", database, table)
|
||||
case "pgsql", "postgresql":
|
||||
sql = fmt.Sprintf(`SELECT column_name as "Field", data_type as "Type", is_nullable as "Null", column_default as "Default" FROM information_schema.columns WHERE table_schema = 'public' AND table_name = '%s'`, table)
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported datasource type for describe_table: %s", dsType)
|
||||
}
|
||||
|
||||
// 执行查询
|
||||
query := map[string]interface{}{"sql": sql, "database": database}
|
||||
data, _, err := plug.QueryLog(ctx, query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to describe table: %v", err)
|
||||
}
|
||||
|
||||
// 转换为统一的列结构
|
||||
columns := convertToColumnInfo(data, dsType)
|
||||
|
||||
logger.Debugf("describe_table: dsType=%s, table=%s.%s, found %d columns", dsType, database, table, len(columns))
|
||||
|
||||
bytes, _ := json.Marshal(columns)
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
// ColumnInfo 列信息
|
||||
type ColumnInfo struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Comment string `json:"comment,omitempty"`
|
||||
}
|
||||
|
||||
// extractColumnValues 从查询结果中提取列值
|
||||
func extractColumnValues(data []interface{}, dsType string, columnType string) []string {
|
||||
result := make([]string, 0)
|
||||
for _, row := range data {
|
||||
if rowMap, ok := row.(map[string]interface{}); ok {
|
||||
// 尝试多种可能的列名
|
||||
var value string
|
||||
for _, key := range getPossibleColumnNames(dsType, columnType) {
|
||||
if v, ok := rowMap[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
value = s
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != "" {
|
||||
result = append(result, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// getPossibleColumnNames 获取可能的列名
|
||||
func getPossibleColumnNames(dsType string, columnType string) []string {
|
||||
switch columnType {
|
||||
case "database":
|
||||
return []string{"Database", "database", "datname", "name"}
|
||||
case "table":
|
||||
return []string{"Tables_in_", "table", "tablename", "name", "Name"}
|
||||
default:
|
||||
return []string{}
|
||||
}
|
||||
}
|
||||
|
||||
// convertToColumnInfo 将查询结果转换为统一的列信息格式
|
||||
func convertToColumnInfo(data []interface{}, dsType string) []ColumnInfo {
|
||||
result := make([]ColumnInfo, 0)
|
||||
for _, row := range data {
|
||||
if rowMap, ok := row.(map[string]interface{}); ok {
|
||||
col := ColumnInfo{}
|
||||
|
||||
// 提取列名
|
||||
for _, key := range []string{"Field", "field", "column_name", "name"} {
|
||||
if v, ok := rowMap[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
col.Name = s
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 提取类型
|
||||
for _, key := range []string{"Type", "type", "data_type"} {
|
||||
if v, ok := rowMap[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
col.Type = s
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 提取注释(可选)
|
||||
for _, key := range []string{"Comment", "comment", "column_comment"} {
|
||||
if v, ok := rowMap[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
col.Comment = s
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if col.Name != "" {
|
||||
result = append(result, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
376
aiagent/llm/claude.go
Normal file
376
aiagent/llm/claude.go
Normal file
@@ -0,0 +1,376 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultClaudeURL = "https://api.anthropic.com/v1/messages"
|
||||
ClaudeAPIVersion = "2023-06-01"
|
||||
DefaultClaudeMaxTokens = 4096
|
||||
)
|
||||
|
||||
// Claude implements the LLM interface for Anthropic Claude API
|
||||
type Claude struct {
|
||||
config *Config
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewClaude creates a new Claude provider
|
||||
func NewClaude(cfg *Config, client *http.Client) (*Claude, error) {
|
||||
if cfg.BaseURL == "" {
|
||||
cfg.BaseURL = DefaultClaudeURL
|
||||
}
|
||||
return &Claude{
|
||||
config: cfg,
|
||||
client: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Claude) Name() string {
|
||||
return ProviderClaude
|
||||
}
|
||||
|
||||
// Claude API request/response structures
|
||||
type claudeRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []claudeMessage `json:"messages"`
|
||||
System string `json:"system,omitempty"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
Stop []string `json:"stop_sequences,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []claudeTool `json:"tools,omitempty"`
|
||||
}
|
||||
|
||||
type claudeMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content []claudeContentBlock `json:"content"`
|
||||
}
|
||||
|
||||
type claudeContentBlock struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Input any `json:"input,omitempty"`
|
||||
ToolUseID string `json:"tool_use_id,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
}
|
||||
|
||||
type claudeTool struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
InputSchema map[string]interface{} `json:"input_schema"`
|
||||
}
|
||||
|
||||
type claudeResponse struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Role string `json:"role"`
|
||||
Content []claudeContentBlock `json:"content"`
|
||||
Model string `json:"model"`
|
||||
StopReason string `json:"stop_reason"`
|
||||
StopSequence string `json:"stop_sequence,omitempty"`
|
||||
Usage *struct {
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
} `json:"usage,omitempty"`
|
||||
Error *struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// Claude streaming event types
|
||||
type claudeStreamEvent struct {
|
||||
Type string `json:"type"`
|
||||
Index int `json:"index,omitempty"`
|
||||
ContentBlock *claudeContentBlock `json:"content_block,omitempty"`
|
||||
Delta *claudeStreamDelta `json:"delta,omitempty"`
|
||||
Message *claudeResponse `json:"message,omitempty"`
|
||||
Usage *claudeStreamUsage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
type claudeStreamDelta struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
PartialJSON string `json:"partial_json,omitempty"`
|
||||
StopReason string `json:"stop_reason,omitempty"`
|
||||
}
|
||||
|
||||
type claudeStreamUsage struct {
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
}
|
||||
|
||||
func (c *Claude) Generate(ctx context.Context, req *GenerateRequest) (*GenerateResponse, error) {
|
||||
claudeReq := c.convertRequest(req)
|
||||
claudeReq.Stream = false
|
||||
|
||||
respBody, err := c.doRequest(ctx, claudeReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var claudeResp claudeResponse
|
||||
if err := json.Unmarshal(respBody, &claudeResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
if claudeResp.Error != nil {
|
||||
return nil, fmt.Errorf("Claude API error: %s", claudeResp.Error.Message)
|
||||
}
|
||||
|
||||
return c.convertResponse(&claudeResp), nil
|
||||
}
|
||||
|
||||
func (c *Claude) GenerateStream(ctx context.Context, req *GenerateRequest) (<-chan StreamChunk, error) {
|
||||
claudeReq := c.convertRequest(req)
|
||||
claudeReq.Stream = true
|
||||
|
||||
jsonData, err := json.Marshal(claudeReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.config.BaseURL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
c.setHeaders(httpReq)
|
||||
|
||||
resp, err := c.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("Claude API error (status %d): %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
ch := make(chan StreamChunk, 100)
|
||||
go c.streamResponse(ctx, resp, ch)
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func (c *Claude) streamResponse(ctx context.Context, resp *http.Response, ch chan<- StreamChunk) {
|
||||
defer close(ch)
|
||||
defer resp.Body.Close()
|
||||
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
var currentToolCall *ToolCall
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ch <- StreamChunk{Done: true, Error: ctx.Err()}
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
line, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
ch <- StreamChunk{Done: true, Error: err}
|
||||
} else {
|
||||
ch <- StreamChunk{Done: true}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || !strings.HasPrefix(line, "data: ") {
|
||||
continue
|
||||
}
|
||||
|
||||
data := strings.TrimPrefix(line, "data: ")
|
||||
|
||||
var event claudeStreamEvent
|
||||
if err := json.Unmarshal([]byte(data), &event); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
switch event.Type {
|
||||
case "content_block_start":
|
||||
if event.ContentBlock != nil && event.ContentBlock.Type == "tool_use" {
|
||||
currentToolCall = &ToolCall{
|
||||
ID: event.ContentBlock.ID,
|
||||
Name: event.ContentBlock.Name,
|
||||
}
|
||||
}
|
||||
|
||||
case "content_block_delta":
|
||||
if event.Delta != nil {
|
||||
chunk := StreamChunk{}
|
||||
|
||||
switch event.Delta.Type {
|
||||
case "text_delta":
|
||||
chunk.Content = event.Delta.Text
|
||||
case "input_json_delta":
|
||||
if currentToolCall != nil {
|
||||
currentToolCall.Arguments += event.Delta.PartialJSON
|
||||
}
|
||||
}
|
||||
|
||||
if chunk.Content != "" {
|
||||
ch <- chunk
|
||||
}
|
||||
}
|
||||
|
||||
case "content_block_stop":
|
||||
if currentToolCall != nil {
|
||||
ch <- StreamChunk{
|
||||
ToolCalls: []ToolCall{*currentToolCall},
|
||||
}
|
||||
currentToolCall = nil
|
||||
}
|
||||
|
||||
case "message_delta":
|
||||
if event.Delta != nil && event.Delta.StopReason != "" {
|
||||
ch <- StreamChunk{
|
||||
FinishReason: event.Delta.StopReason,
|
||||
}
|
||||
}
|
||||
|
||||
case "message_stop":
|
||||
ch <- StreamChunk{Done: true}
|
||||
return
|
||||
|
||||
case "error":
|
||||
ch <- StreamChunk{Done: true, Error: fmt.Errorf("stream error")}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Claude) convertRequest(req *GenerateRequest) *claudeRequest {
|
||||
claudeReq := &claudeRequest{
|
||||
Model: c.config.Model,
|
||||
MaxTokens: req.MaxTokens,
|
||||
Temperature: req.Temperature,
|
||||
TopP: req.TopP,
|
||||
Stop: req.Stop,
|
||||
}
|
||||
|
||||
if claudeReq.MaxTokens <= 0 {
|
||||
claudeReq.MaxTokens = DefaultClaudeMaxTokens
|
||||
}
|
||||
|
||||
// Extract system message and convert other messages
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role == RoleSystem {
|
||||
claudeReq.System = msg.Content
|
||||
continue
|
||||
}
|
||||
|
||||
// Claude uses content blocks instead of plain strings
|
||||
claudeMsg := claudeMessage{
|
||||
Role: msg.Role,
|
||||
Content: []claudeContentBlock{
|
||||
{Type: "text", Text: msg.Content},
|
||||
},
|
||||
}
|
||||
claudeReq.Messages = append(claudeReq.Messages, claudeMsg)
|
||||
}
|
||||
|
||||
// Convert tools
|
||||
for _, tool := range req.Tools {
|
||||
claudeReq.Tools = append(claudeReq.Tools, claudeTool{
|
||||
Name: tool.Name,
|
||||
Description: tool.Description,
|
||||
InputSchema: tool.Parameters,
|
||||
})
|
||||
}
|
||||
|
||||
return claudeReq
|
||||
}
|
||||
|
||||
func (c *Claude) convertResponse(resp *claudeResponse) *GenerateResponse {
|
||||
result := &GenerateResponse{
|
||||
FinishReason: resp.StopReason,
|
||||
}
|
||||
|
||||
// Extract text content and tool calls
|
||||
var textParts []string
|
||||
for _, block := range resp.Content {
|
||||
switch block.Type {
|
||||
case "text":
|
||||
textParts = append(textParts, block.Text)
|
||||
case "tool_use":
|
||||
inputJSON, _ := json.Marshal(block.Input)
|
||||
result.ToolCalls = append(result.ToolCalls, ToolCall{
|
||||
ID: block.ID,
|
||||
Name: block.Name,
|
||||
Arguments: string(inputJSON),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
result.Content = strings.Join(textParts, "")
|
||||
|
||||
if resp.Usage != nil {
|
||||
result.Usage = &Usage{
|
||||
PromptTokens: resp.Usage.InputTokens,
|
||||
CompletionTokens: resp.Usage.OutputTokens,
|
||||
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (c *Claude) doRequest(ctx context.Context, req *claudeRequest) ([]byte, error) {
|
||||
jsonData, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.config.BaseURL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
c.setHeaders(httpReq)
|
||||
|
||||
resp, err := c.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("Claude API error (status %d): %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
func (c *Claude) setHeaders(req *http.Request) {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("anthropic-version", ClaudeAPIVersion)
|
||||
|
||||
if c.config.APIKey != "" {
|
||||
req.Header.Set("x-api-key", c.config.APIKey)
|
||||
}
|
||||
|
||||
for k, v := range c.config.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
}
|
||||
376
aiagent/llm/gemini.go
Normal file
376
aiagent/llm/gemini.go
Normal file
@@ -0,0 +1,376 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultGeminiURL = "https://generativelanguage.googleapis.com/v1beta/models"
|
||||
)
|
||||
|
||||
// Gemini implements the LLM interface for Google Gemini API
|
||||
type Gemini struct {
|
||||
config *Config
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewGemini creates a new Gemini provider
|
||||
func NewGemini(cfg *Config, client *http.Client) (*Gemini, error) {
|
||||
if cfg.BaseURL == "" {
|
||||
cfg.BaseURL = DefaultGeminiURL
|
||||
}
|
||||
return &Gemini{
|
||||
config: cfg,
|
||||
client: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (g *Gemini) Name() string {
|
||||
return ProviderGemini
|
||||
}
|
||||
|
||||
// Gemini API request/response structures
|
||||
type geminiRequest struct {
|
||||
Contents []geminiContent `json:"contents"`
|
||||
SystemInstruction *geminiContent `json:"systemInstruction,omitempty"`
|
||||
Tools []geminiTool `json:"tools,omitempty"`
|
||||
GenerationConfig *geminiGenerationConfig `json:"generationConfig,omitempty"`
|
||||
}
|
||||
|
||||
type geminiContent struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Parts []geminiPart `json:"parts"`
|
||||
}
|
||||
|
||||
type geminiPart struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
FunctionCall *geminiFunctionCall `json:"functionCall,omitempty"`
|
||||
FunctionResponse *geminiFunctionResponse `json:"functionResponse,omitempty"`
|
||||
}
|
||||
|
||||
type geminiFunctionCall struct {
|
||||
Name string `json:"name"`
|
||||
Args map[string]interface{} `json:"args"`
|
||||
}
|
||||
|
||||
type geminiFunctionResponse struct {
|
||||
Name string `json:"name"`
|
||||
Response map[string]interface{} `json:"response"`
|
||||
}
|
||||
|
||||
type geminiTool struct {
|
||||
FunctionDeclarations []geminiFunctionDeclaration `json:"functionDeclarations,omitempty"`
|
||||
}
|
||||
|
||||
type geminiFunctionDeclaration struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters map[string]interface{} `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type geminiGenerationConfig struct {
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"topP,omitempty"`
|
||||
MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
|
||||
StopSequences []string `json:"stopSequences,omitempty"`
|
||||
}
|
||||
|
||||
type geminiResponse struct {
|
||||
Candidates []struct {
|
||||
Content geminiContent `json:"content"`
|
||||
FinishReason string `json:"finishReason"`
|
||||
SafetyRatings []struct {
|
||||
Category string `json:"category"`
|
||||
Probability string `json:"probability"`
|
||||
} `json:"safetyRatings,omitempty"`
|
||||
} `json:"candidates"`
|
||||
UsageMetadata *struct {
|
||||
PromptTokenCount int `json:"promptTokenCount"`
|
||||
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
||||
TotalTokenCount int `json:"totalTokenCount"`
|
||||
} `json:"usageMetadata,omitempty"`
|
||||
Error *struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Status string `json:"status"`
|
||||
} `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func (g *Gemini) Generate(ctx context.Context, req *GenerateRequest) (*GenerateResponse, error) {
|
||||
geminiReq := g.convertRequest(req)
|
||||
|
||||
url := g.buildURL(false)
|
||||
respBody, err := g.doRequest(ctx, url, geminiReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var geminiResp geminiResponse
|
||||
if err := json.Unmarshal(respBody, &geminiResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
if geminiResp.Error != nil {
|
||||
return nil, fmt.Errorf("Gemini API error: %s", geminiResp.Error.Message)
|
||||
}
|
||||
|
||||
return g.convertResponse(&geminiResp), nil
|
||||
}
|
||||
|
||||
func (g *Gemini) GenerateStream(ctx context.Context, req *GenerateRequest) (<-chan StreamChunk, error) {
|
||||
geminiReq := g.convertRequest(req)
|
||||
|
||||
jsonData, err := json.Marshal(geminiReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
url := g.buildURL(true)
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
g.setHeaders(httpReq)
|
||||
|
||||
resp, err := g.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("Gemini API error (status %d): %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
ch := make(chan StreamChunk, 100)
|
||||
go g.streamResponse(ctx, resp, ch)
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func (g *Gemini) streamResponse(ctx context.Context, resp *http.Response, ch chan<- StreamChunk) {
|
||||
defer close(ch)
|
||||
defer resp.Body.Close()
|
||||
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
var buffer strings.Builder
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ch <- StreamChunk{Done: true, Error: ctx.Err()}
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
line, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
ch <- StreamChunk{Done: true, Error: err}
|
||||
} else {
|
||||
ch <- StreamChunk{Done: true}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
// Gemini streams JSON objects, accumulate until we have a complete one
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle SSE format if present
|
||||
if strings.HasPrefix(line, "data: ") {
|
||||
line = strings.TrimPrefix(line, "data: ")
|
||||
}
|
||||
|
||||
buffer.WriteString(line)
|
||||
|
||||
// Try to parse accumulated JSON
|
||||
var geminiResp geminiResponse
|
||||
if err := json.Unmarshal([]byte(buffer.String()), &geminiResp); err != nil {
|
||||
// Not complete yet, continue accumulating
|
||||
continue
|
||||
}
|
||||
|
||||
// Reset buffer for next response
|
||||
buffer.Reset()
|
||||
|
||||
if len(geminiResp.Candidates) > 0 {
|
||||
candidate := geminiResp.Candidates[0]
|
||||
chunk := StreamChunk{
|
||||
FinishReason: candidate.FinishReason,
|
||||
}
|
||||
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if part.Text != "" {
|
||||
chunk.Content += part.Text
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
argsJSON, _ := json.Marshal(part.FunctionCall.Args)
|
||||
chunk.ToolCalls = append(chunk.ToolCalls, ToolCall{
|
||||
Name: part.FunctionCall.Name,
|
||||
Arguments: string(argsJSON),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ch <- chunk
|
||||
|
||||
if candidate.FinishReason != "" && candidate.FinishReason != "STOP" {
|
||||
ch <- StreamChunk{Done: true}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Gemini) convertRequest(req *GenerateRequest) *geminiRequest {
|
||||
geminiReq := &geminiRequest{
|
||||
GenerationConfig: &geminiGenerationConfig{
|
||||
Temperature: req.Temperature,
|
||||
TopP: req.TopP,
|
||||
MaxOutputTokens: req.MaxTokens,
|
||||
StopSequences: req.Stop,
|
||||
},
|
||||
}
|
||||
|
||||
// Convert messages
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role == RoleSystem {
|
||||
geminiReq.SystemInstruction = &geminiContent{
|
||||
Parts: []geminiPart{{Text: msg.Content}},
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Map roles
|
||||
role := msg.Role
|
||||
if role == RoleAssistant {
|
||||
role = "model"
|
||||
}
|
||||
|
||||
geminiReq.Contents = append(geminiReq.Contents, geminiContent{
|
||||
Role: role,
|
||||
Parts: []geminiPart{{Text: msg.Content}},
|
||||
})
|
||||
}
|
||||
|
||||
// Convert tools
|
||||
if len(req.Tools) > 0 {
|
||||
var declarations []geminiFunctionDeclaration
|
||||
for _, tool := range req.Tools {
|
||||
declarations = append(declarations, geminiFunctionDeclaration{
|
||||
Name: tool.Name,
|
||||
Description: tool.Description,
|
||||
Parameters: tool.Parameters,
|
||||
})
|
||||
}
|
||||
geminiReq.Tools = []geminiTool{{FunctionDeclarations: declarations}}
|
||||
}
|
||||
|
||||
return geminiReq
|
||||
}
|
||||
|
||||
func (g *Gemini) convertResponse(resp *geminiResponse) *GenerateResponse {
|
||||
result := &GenerateResponse{}
|
||||
|
||||
if len(resp.Candidates) > 0 {
|
||||
candidate := resp.Candidates[0]
|
||||
result.FinishReason = candidate.FinishReason
|
||||
|
||||
var textParts []string
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if part.Text != "" {
|
||||
textParts = append(textParts, part.Text)
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
argsJSON, _ := json.Marshal(part.FunctionCall.Args)
|
||||
result.ToolCalls = append(result.ToolCalls, ToolCall{
|
||||
Name: part.FunctionCall.Name,
|
||||
Arguments: string(argsJSON),
|
||||
})
|
||||
}
|
||||
}
|
||||
result.Content = strings.Join(textParts, "")
|
||||
}
|
||||
|
||||
if resp.UsageMetadata != nil {
|
||||
result.Usage = &Usage{
|
||||
PromptTokens: resp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: resp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: resp.UsageMetadata.TotalTokenCount,
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (g *Gemini) buildURL(stream bool) string {
|
||||
action := "generateContent"
|
||||
if stream {
|
||||
action = "streamGenerateContent"
|
||||
}
|
||||
|
||||
// Check if baseURL already contains the full path
|
||||
if strings.Contains(g.config.BaseURL, ":generateContent") ||
|
||||
strings.Contains(g.config.BaseURL, ":streamGenerateContent") {
|
||||
return fmt.Sprintf("%s?key=%s", g.config.BaseURL, g.config.APIKey)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s/%s:%s?key=%s",
|
||||
g.config.BaseURL,
|
||||
g.config.Model,
|
||||
action,
|
||||
g.config.APIKey,
|
||||
)
|
||||
}
|
||||
|
||||
func (g *Gemini) doRequest(ctx context.Context, url string, req *geminiRequest) ([]byte, error) {
|
||||
jsonData, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
g.setHeaders(httpReq)
|
||||
|
||||
resp, err := g.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("Gemini API error (status %d): %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
func (g *Gemini) setHeaders(req *http.Request) {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
for k, v := range g.config.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
}
|
||||
135
aiagent/llm/helper.go
Normal file
135
aiagent/llm/helper.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Chat is a convenience function for simple chat completions
|
||||
func Chat(ctx context.Context, llm LLM, messages []Message) (string, error) {
|
||||
resp, err := llm.Generate(ctx, &GenerateRequest{
|
||||
Messages: messages,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return resp.Content, nil
|
||||
}
|
||||
|
||||
// ChatWithSystem is a convenience function for chat with a system prompt
|
||||
func ChatWithSystem(ctx context.Context, llm LLM, systemPrompt string, userMessage string) (string, error) {
|
||||
messages := []Message{
|
||||
{Role: RoleSystem, Content: systemPrompt},
|
||||
{Role: RoleUser, Content: userMessage},
|
||||
}
|
||||
return Chat(ctx, llm, messages)
|
||||
}
|
||||
|
||||
// NewMessage creates a new message
|
||||
func NewMessage(role, content string) Message {
|
||||
return Message{Role: role, Content: content}
|
||||
}
|
||||
|
||||
// SystemMessage creates a system message
|
||||
func SystemMessage(content string) Message {
|
||||
return Message{Role: RoleSystem, Content: content}
|
||||
}
|
||||
|
||||
// UserMessage creates a user message
|
||||
func UserMessage(content string) Message {
|
||||
return Message{Role: RoleUser, Content: content}
|
||||
}
|
||||
|
||||
// AssistantMessage creates an assistant message
|
||||
func AssistantMessage(content string) Message {
|
||||
return Message{Role: RoleAssistant, Content: content}
|
||||
}
|
||||
|
||||
// DetectProvider attempts to detect the provider from the base URL
|
||||
func DetectProvider(baseURL string) string {
|
||||
baseURL = strings.ToLower(baseURL)
|
||||
|
||||
switch {
|
||||
case strings.Contains(baseURL, "anthropic.com"):
|
||||
return ProviderClaude
|
||||
case strings.Contains(baseURL, "generativelanguage.googleapis.com"):
|
||||
return ProviderGemini
|
||||
case strings.Contains(baseURL, "aiplatform.googleapis.com"):
|
||||
return ProviderVertex
|
||||
case strings.Contains(baseURL, "bedrock"):
|
||||
return ProviderBedrock
|
||||
case strings.Contains(baseURL, "localhost:11434"):
|
||||
return ProviderOllama
|
||||
default:
|
||||
// Default to OpenAI-compatible
|
||||
return ProviderOpenAI
|
||||
}
|
||||
}
|
||||
|
||||
// DetectProviderFromModel attempts to detect the provider from the model name
|
||||
func DetectProviderFromModel(model string) string {
|
||||
model = strings.ToLower(model)
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(model, "claude"):
|
||||
return ProviderClaude
|
||||
case strings.HasPrefix(model, "gemini"):
|
||||
return ProviderGemini
|
||||
case strings.HasPrefix(model, "gpt") || strings.HasPrefix(model, "o1") || strings.HasPrefix(model, "o3"):
|
||||
return ProviderOpenAI
|
||||
case strings.HasPrefix(model, "llama") || strings.HasPrefix(model, "mistral") || strings.HasPrefix(model, "qwen"):
|
||||
return ProviderOllama
|
||||
default:
|
||||
return ProviderOpenAI
|
||||
}
|
||||
}
|
||||
|
||||
// BuildToolDefinition creates a tool definition with JSON schema parameters
|
||||
func BuildToolDefinition(name, description string, properties map[string]interface{}, required []string) ToolDefinition {
|
||||
params := map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
}
|
||||
if len(required) > 0 {
|
||||
params["required"] = required
|
||||
}
|
||||
|
||||
return ToolDefinition{
|
||||
Name: name,
|
||||
Description: description,
|
||||
Parameters: params,
|
||||
}
|
||||
}
|
||||
|
||||
// CollectStream collects all chunks from a stream into a single response
|
||||
func CollectStream(ch <-chan StreamChunk) (*GenerateResponse, error) {
|
||||
var content strings.Builder
|
||||
var toolCalls []ToolCall
|
||||
var finishReason string
|
||||
var lastErr error
|
||||
|
||||
for chunk := range ch {
|
||||
if chunk.Error != nil {
|
||||
lastErr = chunk.Error
|
||||
}
|
||||
if chunk.Content != "" {
|
||||
content.WriteString(chunk.Content)
|
||||
}
|
||||
if len(chunk.ToolCalls) > 0 {
|
||||
toolCalls = append(toolCalls, chunk.ToolCalls...)
|
||||
}
|
||||
if chunk.FinishReason != "" {
|
||||
finishReason = chunk.FinishReason
|
||||
}
|
||||
}
|
||||
|
||||
if lastErr != nil {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
return &GenerateResponse{
|
||||
Content: content.String(),
|
||||
ToolCalls: toolCalls,
|
||||
FinishReason: finishReason,
|
||||
}, nil
|
||||
}
|
||||
193
aiagent/llm/llm.go
Normal file
193
aiagent/llm/llm.go
Normal file
@@ -0,0 +1,193 @@
|
||||
// Package llm provides a unified interface for multiple LLM providers.
|
||||
// Supports OpenAI-compatible APIs, Claude/Anthropic, and Gemini.
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Provider types
|
||||
const (
|
||||
ProviderOpenAI = "openai" // OpenAI and compatible APIs (Azure, vLLM, etc.)
|
||||
ProviderClaude = "claude" // Anthropic Claude
|
||||
ProviderGemini = "gemini" // Google Gemini
|
||||
ProviderOllama = "ollama" // Ollama local models
|
||||
ProviderBedrock = "bedrock" // AWS Bedrock
|
||||
ProviderVertex = "vertex" // Google Vertex AI
|
||||
)
|
||||
|
||||
// Role constants
|
||||
const (
|
||||
RoleSystem = "system"
|
||||
RoleUser = "user"
|
||||
RoleAssistant = "assistant"
|
||||
)
|
||||
|
||||
// Message represents a chat message
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
// ToolCall represents a tool/function call from the LLM
|
||||
type ToolCall struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Arguments string `json:"arguments"`
|
||||
}
|
||||
|
||||
// ToolDefinition defines a tool that the LLM can call
|
||||
type ToolDefinition struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters map[string]interface{} `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
// GenerateRequest is the unified request for LLM generation
|
||||
type GenerateRequest struct {
|
||||
Messages []Message `json:"messages"`
|
||||
Tools []ToolDefinition `json:"tools,omitempty"`
|
||||
MaxTokens int `json:"max_tokens,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
}
|
||||
|
||||
// GenerateResponse is the unified response from LLM generation
|
||||
type GenerateResponse struct {
|
||||
Content string `json:"content"`
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
Usage *Usage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
// Usage represents token usage statistics
|
||||
type Usage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
// StreamChunk represents a chunk in streaming response
|
||||
type StreamChunk struct {
|
||||
Content string `json:"content,omitempty"`
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Done bool `json:"done"`
|
||||
Error error `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// LLM is the unified interface for all LLM providers
|
||||
type LLM interface {
|
||||
// Name returns the provider name
|
||||
Name() string
|
||||
|
||||
// Generate sends a request to the LLM and returns the response
|
||||
Generate(ctx context.Context, req *GenerateRequest) (*GenerateResponse, error)
|
||||
|
||||
// GenerateStream sends a request and returns a channel for streaming responses
|
||||
GenerateStream(ctx context.Context, req *GenerateRequest) (<-chan StreamChunk, error)
|
||||
}
|
||||
|
||||
// Config is the configuration for creating an LLM provider
|
||||
type Config struct {
|
||||
// Provider type: openai, claude, gemini, ollama, bedrock, vertex
|
||||
Provider string `json:"provider"`
|
||||
|
||||
// API endpoint URL
|
||||
BaseURL string `json:"base_url,omitempty"`
|
||||
|
||||
// API key or token
|
||||
APIKey string `json:"api_key,omitempty"`
|
||||
|
||||
// Model name (e.g., "gpt-4", "claude-3-opus", "gemini-pro")
|
||||
Model string `json:"model"`
|
||||
|
||||
// Additional headers for API requests
|
||||
Headers map[string]string `json:"headers,omitempty"`
|
||||
|
||||
// HTTP timeout in milliseconds
|
||||
Timeout int `json:"timeout,omitempty"`
|
||||
|
||||
// Skip SSL verification (for self-signed certs)
|
||||
SkipSSLVerify bool `json:"skip_ssl_verify,omitempty"`
|
||||
|
||||
// HTTP proxy URL
|
||||
Proxy string `json:"proxy,omitempty"`
|
||||
|
||||
// Provider-specific options
|
||||
Options map[string]interface{} `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
// DefaultConfig returns a config with default values
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
Provider: ProviderOpenAI,
|
||||
Timeout: 60000,
|
||||
}
|
||||
}
|
||||
|
||||
// New creates an LLM instance based on the config
|
||||
func New(cfg *Config) (LLM, error) {
|
||||
if cfg == nil {
|
||||
cfg = DefaultConfig()
|
||||
}
|
||||
|
||||
// Create HTTP client
|
||||
client := createHTTPClient(cfg)
|
||||
|
||||
switch cfg.Provider {
|
||||
case ProviderOpenAI, "":
|
||||
return NewOpenAI(cfg, client)
|
||||
case ProviderClaude:
|
||||
return NewClaude(cfg, client)
|
||||
case ProviderGemini:
|
||||
return NewGemini(cfg, client)
|
||||
case ProviderOllama:
|
||||
// Ollama uses OpenAI-compatible API
|
||||
if cfg.BaseURL == "" {
|
||||
cfg.BaseURL = "http://localhost:11434/v1"
|
||||
}
|
||||
return NewOpenAI(cfg, client)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported LLM provider: %s", cfg.Provider)
|
||||
}
|
||||
}
|
||||
|
||||
// createHTTPClient creates an HTTP client with the given config
|
||||
func createHTTPClient(cfg *Config) *http.Client {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: cfg.SkipSSLVerify,
|
||||
},
|
||||
}
|
||||
|
||||
if cfg.Proxy != "" {
|
||||
if proxyURL, err := url.Parse(cfg.Proxy); err == nil {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
}
|
||||
|
||||
timeout := cfg.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 60000
|
||||
}
|
||||
|
||||
return &http.Client{
|
||||
Timeout: time.Duration(timeout) * time.Millisecond,
|
||||
Transport: transport,
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to convert internal messages to provider-specific format
|
||||
func ConvertMessages(messages []Message) []Message {
|
||||
result := make([]Message, len(messages))
|
||||
copy(result, messages)
|
||||
return result
|
||||
}
|
||||
416
aiagent/llm/openai.go
Normal file
416
aiagent/llm/openai.go
Normal file
@@ -0,0 +1,416 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultOpenAIURL = "https://api.openai.com/v1/chat/completions"
|
||||
|
||||
// 重试相关配置
|
||||
maxRetries = 3
|
||||
initialRetryWait = 5 * time.Second // rate limit 时初始等待 5 秒
|
||||
maxRetryWait = 60 * time.Second // 最大等待 60 秒
|
||||
)
|
||||
|
||||
// OpenAI implements the LLM interface for OpenAI and compatible APIs
|
||||
type OpenAI struct {
|
||||
config *Config
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewOpenAI creates a new OpenAI provider
|
||||
func NewOpenAI(cfg *Config, client *http.Client) (*OpenAI, error) {
|
||||
if cfg.BaseURL == "" {
|
||||
cfg.BaseURL = DefaultOpenAIURL
|
||||
}
|
||||
return &OpenAI{
|
||||
config: cfg,
|
||||
client: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (o *OpenAI) Name() string {
|
||||
return ProviderOpenAI
|
||||
}
|
||||
|
||||
// OpenAI API request/response structures
|
||||
type openAIRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []openAIMessage `json:"messages"`
|
||||
Tools []openAITool `json:"tools,omitempty"`
|
||||
MaxTokens int `json:"max_tokens,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
}
|
||||
|
||||
type openAIMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content,omitempty"`
|
||||
ToolCalls []openAIToolCall `json:"tool_calls,omitempty"`
|
||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||
}
|
||||
|
||||
type openAITool struct {
|
||||
Type string `json:"type"`
|
||||
Function openAIFunction `json:"function"`
|
||||
}
|
||||
|
||||
type openAIFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters map[string]interface{} `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type openAIToolCall struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments string `json:"arguments"`
|
||||
} `json:"function"`
|
||||
}
|
||||
|
||||
type openAIResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []struct {
|
||||
Index int `json:"index"`
|
||||
Message openAIMessage `json:"message"`
|
||||
Delta openAIMessage `json:"delta"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
} `json:"choices"`
|
||||
Usage *struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
} `json:"usage,omitempty"`
|
||||
Error *struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Code string `json:"code"`
|
||||
} `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func (o *OpenAI) Generate(ctx context.Context, req *GenerateRequest) (*GenerateResponse, error) {
|
||||
// Convert to OpenAI format
|
||||
openAIReq := o.convertRequest(req)
|
||||
openAIReq.Stream = false
|
||||
|
||||
// Make request
|
||||
respBody, err := o.doRequest(ctx, openAIReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var openAIResp openAIResponse
|
||||
if err := json.Unmarshal(respBody, &openAIResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
if openAIResp.Error != nil {
|
||||
return nil, fmt.Errorf("OpenAI API error: %s", openAIResp.Error.Message)
|
||||
}
|
||||
|
||||
if len(openAIResp.Choices) == 0 {
|
||||
return nil, fmt.Errorf("no response from OpenAI")
|
||||
}
|
||||
|
||||
// Convert to unified response
|
||||
return o.convertResponse(&openAIResp), nil
|
||||
}
|
||||
|
||||
// isRetryableStatus 检查是否是可重试的 HTTP 状态码
|
||||
func isRetryableStatus(statusCode int) bool {
|
||||
switch statusCode {
|
||||
case 429, 500, 502, 503, 504:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OpenAI) GenerateStream(ctx context.Context, req *GenerateRequest) (<-chan StreamChunk, error) {
|
||||
// Convert to OpenAI format
|
||||
openAIReq := o.convertRequest(req)
|
||||
openAIReq.Stream = true
|
||||
|
||||
// Create request body
|
||||
jsonData, err := json.Marshal(openAIReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
var resp *http.Response
|
||||
var lastErr error
|
||||
retryWait := initialRetryWait
|
||||
|
||||
// 重试循环
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
// 等待后重试
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
case <-time.After(retryWait):
|
||||
}
|
||||
// 指数退避,但不超过最大等待时间
|
||||
retryWait *= 2
|
||||
if retryWait > maxRetryWait {
|
||||
retryWait = maxRetryWait
|
||||
}
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", o.config.BaseURL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
o.setHeaders(httpReq)
|
||||
|
||||
// Make request
|
||||
resp, err = o.client.Do(httpReq)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("failed to send request: %w", err)
|
||||
continue // 网络错误,重试
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
lastErr = fmt.Errorf("OpenAI API error (status %d): %s", resp.StatusCode, string(body))
|
||||
|
||||
// 检查是否可重试
|
||||
if isRetryableStatus(resp.StatusCode) && attempt < maxRetries {
|
||||
continue // 可重试的错误,继续重试
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// 成功,跳出循环
|
||||
break
|
||||
}
|
||||
|
||||
if resp == nil {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// Create channel and start streaming
|
||||
ch := make(chan StreamChunk, 100)
|
||||
go o.streamResponse(ctx, resp, ch)
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func (o *OpenAI) streamResponse(ctx context.Context, resp *http.Response, ch chan<- StreamChunk) {
|
||||
defer close(ch)
|
||||
defer resp.Body.Close()
|
||||
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ch <- StreamChunk{Done: true, Error: ctx.Err()}
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
line, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
ch <- StreamChunk{Done: true, Error: err}
|
||||
} else {
|
||||
ch <- StreamChunk{Done: true}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(line, "data: ") {
|
||||
continue
|
||||
}
|
||||
|
||||
data := strings.TrimPrefix(line, "data: ")
|
||||
if data == "[DONE]" {
|
||||
ch <- StreamChunk{Done: true}
|
||||
return
|
||||
}
|
||||
|
||||
var streamResp openAIResponse
|
||||
if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(streamResp.Choices) > 0 {
|
||||
delta := streamResp.Choices[0].Delta
|
||||
chunk := StreamChunk{
|
||||
Content: delta.Content,
|
||||
FinishReason: streamResp.Choices[0].FinishReason,
|
||||
}
|
||||
|
||||
// Handle tool calls in stream
|
||||
if len(delta.ToolCalls) > 0 {
|
||||
for _, tc := range delta.ToolCalls {
|
||||
chunk.ToolCalls = append(chunk.ToolCalls, ToolCall{
|
||||
ID: tc.ID,
|
||||
Name: tc.Function.Name,
|
||||
Arguments: tc.Function.Arguments,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ch <- chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OpenAI) convertRequest(req *GenerateRequest) *openAIRequest {
|
||||
openAIReq := &openAIRequest{
|
||||
Model: o.config.Model,
|
||||
MaxTokens: req.MaxTokens,
|
||||
Temperature: req.Temperature,
|
||||
TopP: req.TopP,
|
||||
Stop: req.Stop,
|
||||
}
|
||||
|
||||
// Convert messages
|
||||
for _, msg := range req.Messages {
|
||||
openAIReq.Messages = append(openAIReq.Messages, openAIMessage{
|
||||
Role: msg.Role,
|
||||
Content: msg.Content,
|
||||
})
|
||||
}
|
||||
|
||||
// Convert tools
|
||||
for _, tool := range req.Tools {
|
||||
openAIReq.Tools = append(openAIReq.Tools, openAITool{
|
||||
Type: "function",
|
||||
Function: openAIFunction{
|
||||
Name: tool.Name,
|
||||
Description: tool.Description,
|
||||
Parameters: tool.Parameters,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return openAIReq
|
||||
}
|
||||
|
||||
func (o *OpenAI) convertResponse(resp *openAIResponse) *GenerateResponse {
|
||||
result := &GenerateResponse{}
|
||||
|
||||
if len(resp.Choices) > 0 {
|
||||
choice := resp.Choices[0]
|
||||
result.Content = choice.Message.Content
|
||||
result.FinishReason = choice.FinishReason
|
||||
|
||||
// Convert tool calls
|
||||
for _, tc := range choice.Message.ToolCalls {
|
||||
result.ToolCalls = append(result.ToolCalls, ToolCall{
|
||||
ID: tc.ID,
|
||||
Name: tc.Function.Name,
|
||||
Arguments: tc.Function.Arguments,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if resp.Usage != nil {
|
||||
result.Usage = &Usage{
|
||||
PromptTokens: resp.Usage.PromptTokens,
|
||||
CompletionTokens: resp.Usage.CompletionTokens,
|
||||
TotalTokens: resp.Usage.TotalTokens,
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (o *OpenAI) doRequest(ctx context.Context, req *openAIRequest) ([]byte, error) {
|
||||
jsonData, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
retryWait := initialRetryWait
|
||||
|
||||
// 重试循环
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
// 等待后重试
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
case <-time.After(retryWait):
|
||||
}
|
||||
// 指数退避
|
||||
retryWait *= 2
|
||||
if retryWait > maxRetryWait {
|
||||
retryWait = maxRetryWait
|
||||
}
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", o.config.BaseURL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
o.setHeaders(httpReq)
|
||||
|
||||
resp, err := o.client.Do(httpReq)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("failed to send request: %w", err)
|
||||
continue // 网络错误,重试
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("failed to read response: %w", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
lastErr = fmt.Errorf("OpenAI API error (status %d): %s", resp.StatusCode, string(body))
|
||||
// 检查是否可重试
|
||||
if isRetryableStatus(resp.StatusCode) && attempt < maxRetries {
|
||||
continue
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func (o *OpenAI) setHeaders(req *http.Request) {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
if o.config.APIKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+o.config.APIKey)
|
||||
}
|
||||
|
||||
for k, v := range o.config.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
}
|
||||
133
aiagent/llm/prompt.go
Normal file
133
aiagent/llm/prompt.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ToolInfo 工具信息(用于提示词构建)
|
||||
type ToolInfo struct {
|
||||
Name string
|
||||
Description string
|
||||
Parameters []ToolParamInfo
|
||||
}
|
||||
|
||||
// ToolParamInfo 工具参数信息
|
||||
type ToolParamInfo struct {
|
||||
Name string
|
||||
Type string
|
||||
Description string
|
||||
Required bool
|
||||
}
|
||||
|
||||
// PromptData 提示词模板数据
|
||||
type PromptData struct {
|
||||
Platform string // 操作系统
|
||||
Date string // 当前日期
|
||||
}
|
||||
|
||||
// BuildToolsSection 构建工具描述段落
|
||||
func BuildToolsSection(tools []ToolInfo) string {
|
||||
if len(tools) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Available Tools\n\n")
|
||||
|
||||
for _, tool := range tools {
|
||||
sb.WriteString(fmt.Sprintf("### %s\n", tool.Name))
|
||||
sb.WriteString(fmt.Sprintf("%s\n", tool.Description))
|
||||
|
||||
if len(tool.Parameters) > 0 {
|
||||
sb.WriteString("Parameters:\n")
|
||||
for _, param := range tool.Parameters {
|
||||
required := ""
|
||||
if param.Required {
|
||||
required = " (required)"
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("- %s (%s)%s: %s\n", param.Name, param.Type, required, param.Description))
|
||||
}
|
||||
}
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// BuildToolsListBrief 构建简洁的工具列表(用于 Plan 模式)
|
||||
func BuildToolsListBrief(tools []ToolInfo) string {
|
||||
if len(tools) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Available Tools\n\n")
|
||||
|
||||
for _, tool := range tools {
|
||||
sb.WriteString(fmt.Sprintf("- **%s**: %s\n", tool.Name, tool.Description))
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// BuildEnvSection 构建环境信息段落
|
||||
func BuildEnvSection() string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Environment\n\n")
|
||||
sb.WriteString(fmt.Sprintf("- Platform: %s\n", runtime.GOOS))
|
||||
sb.WriteString(fmt.Sprintf("- Date: %s\n", time.Now().Format("2006-01-02")))
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// BuildSkillsSection 构建技能指导段落
|
||||
func BuildSkillsSection(skillContents []string) string {
|
||||
if len(skillContents) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## 专项技能指导\n\n")
|
||||
|
||||
if len(skillContents) == 1 {
|
||||
sb.WriteString("你已被加载以下专项技能,请参考技能中的流程:\n\n")
|
||||
sb.WriteString(skillContents[0])
|
||||
sb.WriteString("\n\n")
|
||||
} else {
|
||||
sb.WriteString("你已被加载以下专项技能,请参考技能中的流程来制定执行计划:\n\n")
|
||||
for i, content := range skillContents {
|
||||
sb.WriteString(fmt.Sprintf("### 技能 %d\n\n", i+1))
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// BuildPreviousFindingsSection 构建之前发现段落
|
||||
func BuildPreviousFindingsSection(findings []string) string {
|
||||
if len(findings) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Previous Findings\n\n")
|
||||
for _, finding := range findings {
|
||||
sb.WriteString(fmt.Sprintf("- %s\n", finding))
|
||||
}
|
||||
sb.WriteString("\n")
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// BuildCurrentStepSection 构建当前步骤段落
|
||||
func BuildCurrentStepSection(goal, approach string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Current Step\n\n")
|
||||
sb.WriteString(fmt.Sprintf("**Goal**: %s\n", goal))
|
||||
sb.WriteString(fmt.Sprintf("**Approach**: %s\n\n", approach))
|
||||
return sb.String()
|
||||
}
|
||||
571
aiagent/mcp.go
Normal file
571
aiagent/mcp.go
Normal file
@@ -0,0 +1,571 @@
|
||||
package aiagent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
// MCP 传输类型
|
||||
MCPTransportStdio = "stdio" // 标准输入/输出传输
|
||||
MCPTransportSSE = "sse" // HTTP Server-Sent Events 传输
|
||||
|
||||
// 默认超时
|
||||
DefaultMCPTimeout = 30000 // 30 秒
|
||||
DefaultMCPConnectTimeout = 10000 // 10 秒
|
||||
)
|
||||
|
||||
// MCPConfig MCP 服务器配置(在 AIAgentConfig 中使用)
|
||||
type MCPConfig struct {
|
||||
// MCP 服务器列表
|
||||
Servers []MCPServerConfig `json:"servers"`
|
||||
}
|
||||
|
||||
// MCPServerConfig 单个 MCP 服务器配置
|
||||
type MCPServerConfig struct {
|
||||
// 服务器名称(唯一标识)
|
||||
Name string `json:"name"`
|
||||
|
||||
// 传输类型:stdio 或 sse
|
||||
Transport string `json:"transport"`
|
||||
|
||||
// === stdio 传输配置 ===
|
||||
Command string `json:"command,omitempty"` // 启动命令
|
||||
Args []string `json:"args,omitempty"` // 命令参数
|
||||
Env map[string]string `json:"env,omitempty"` // 环境变量(支持 ${VAR} 从系统环境变量读取)
|
||||
|
||||
// === SSE 传输配置 ===
|
||||
URL string `json:"url,omitempty"` // SSE 服务器 URL
|
||||
Headers map[string]string `json:"headers,omitempty"` // 请求头(支持 ${VAR} 从系统环境变量读取)
|
||||
SkipSSLVerify bool `json:"skip_ssl_verify,omitempty"` // 跳过 SSL 验证
|
||||
|
||||
// === 鉴权配置(SSE 传输)===
|
||||
// 便捷鉴权配置,会自动设置对应的 Header
|
||||
AuthType string `json:"auth_type,omitempty"` // 鉴权类型:bearer, api_key, basic
|
||||
APIKey string `json:"api_key,omitempty"` // API Key(支持 ${VAR} 从系统环境变量读取)
|
||||
Username string `json:"username,omitempty"` // Basic Auth 用户名
|
||||
Password string `json:"password,omitempty"` // Basic Auth 密码(支持 ${VAR})
|
||||
|
||||
// 通用配置
|
||||
Timeout int `json:"timeout,omitempty"` // 工具调用超时(毫秒)
|
||||
ConnectTimeout int `json:"connect_timeout,omitempty"` // 连接超时(毫秒)
|
||||
}
|
||||
|
||||
// MCPToolConfig MCP 工具配置(在 AgentTool 中使用)
|
||||
type MCPToolConfig struct {
|
||||
// MCP 服务器名称(引用 MCPConfig.Servers 中的配置)
|
||||
ServerName string `json:"server_name"`
|
||||
|
||||
// 工具名称(MCP 服务器返回的工具名)
|
||||
ToolName string `json:"tool_name"`
|
||||
}
|
||||
|
||||
// MCPTool MCP 工具定义(用于内部表示)
|
||||
type MCPTool struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
InputSchema map[string]interface{} `json:"inputSchema,omitempty"`
|
||||
}
|
||||
|
||||
// MCPToolsCallResult 工具调用结果
|
||||
type MCPToolsCallResult struct {
|
||||
Content []MCPContent `json:"content"`
|
||||
IsError bool `json:"isError,omitempty"`
|
||||
}
|
||||
|
||||
// MCPContent 工具返回内容
|
||||
type MCPContent struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
Data string `json:"data,omitempty"`
|
||||
MimeType string `json:"mimeType,omitempty"`
|
||||
}
|
||||
|
||||
// MCPClient MCP 客户端(基于官方 go-sdk)
|
||||
type MCPClient struct {
|
||||
config *MCPServerConfig
|
||||
|
||||
// SDK 客户端和会话(stdio 传输)
|
||||
client *mcp.Client
|
||||
session *mcp.ClientSession
|
||||
|
||||
// SSE 传输(SDK 暂不支持 SSE 客户端,保留自定义实现)
|
||||
httpClient *http.Client
|
||||
sseURL string
|
||||
|
||||
// 通用
|
||||
mu sync.Mutex
|
||||
initialized bool
|
||||
tools []MCPTool // 缓存的工具列表
|
||||
}
|
||||
|
||||
// expandEnvVars 展开字符串中的环境变量引用
|
||||
func expandEnvVars(s string) string {
|
||||
return os.ExpandEnv(s)
|
||||
}
|
||||
|
||||
// NewMCPClient 创建 MCP 客户端
|
||||
func NewMCPClient(config *MCPServerConfig) (*MCPClient, error) {
|
||||
client := &MCPClient{
|
||||
config: config,
|
||||
}
|
||||
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// Connect 连接到 MCP 服务器
|
||||
func (c *MCPClient) Connect(ctx context.Context) error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if c.initialized {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
switch c.config.Transport {
|
||||
case MCPTransportStdio:
|
||||
err = c.connectStdio(ctx)
|
||||
case MCPTransportSSE:
|
||||
err = c.connectSSE(ctx)
|
||||
default:
|
||||
return fmt.Errorf("unsupported MCP transport: %s", c.config.Transport)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.initialized = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// connectStdio 通过 stdio 连接(使用官方 SDK)
|
||||
func (c *MCPClient) connectStdio(ctx context.Context) error {
|
||||
if c.config.Command == "" {
|
||||
return fmt.Errorf("stdio transport requires command")
|
||||
}
|
||||
|
||||
// 准备环境变量
|
||||
env := os.Environ()
|
||||
for k, v := range c.config.Env {
|
||||
expandedValue := expandEnvVars(v)
|
||||
env = append(env, fmt.Sprintf("%s=%s", k, expandedValue))
|
||||
}
|
||||
|
||||
// 创建 exec.Cmd
|
||||
cmd := exec.CommandContext(ctx, c.config.Command, c.config.Args...)
|
||||
cmd.Env = env
|
||||
|
||||
// 使用官方 SDK 的 CommandTransport
|
||||
transport := &mcp.CommandTransport{
|
||||
Command: cmd,
|
||||
}
|
||||
|
||||
// 创建 MCP 客户端
|
||||
c.client = mcp.NewClient(
|
||||
&mcp.Implementation{
|
||||
Name: "nightingale-aiagent",
|
||||
Version: "1.0.0",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
|
||||
// 连接并初始化(Connect 会自动进行 initialize 握手)
|
||||
session, err := c.client.Connect(ctx, transport, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect MCP client: %v", err)
|
||||
}
|
||||
c.session = session
|
||||
|
||||
logger.Infof("MCP stdio server started: %s", c.config.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// connectSSE 通过 SSE 连接(保留自定义实现,SDK 暂不支持 SSE 客户端)
|
||||
func (c *MCPClient) connectSSE(ctx context.Context) error {
|
||||
if c.config.URL == "" {
|
||||
return fmt.Errorf("SSE transport requires URL")
|
||||
}
|
||||
|
||||
// 创建 HTTP 客户端
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.config.SkipSSLVerify},
|
||||
}
|
||||
|
||||
timeout := c.config.ConnectTimeout
|
||||
if timeout <= 0 {
|
||||
timeout = DefaultMCPConnectTimeout
|
||||
}
|
||||
|
||||
c.httpClient = &http.Client{
|
||||
Timeout: time.Duration(timeout) * time.Millisecond,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
c.sseURL = c.config.URL
|
||||
|
||||
logger.Infof("MCP SSE client configured: %s", c.config.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListTools 获取工具列表
|
||||
func (c *MCPClient) ListTools(ctx context.Context) ([]MCPTool, error) {
|
||||
c.mu.Lock()
|
||||
if len(c.tools) > 0 {
|
||||
tools := c.tools
|
||||
c.mu.Unlock()
|
||||
return tools, nil
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
var tools []MCPTool
|
||||
|
||||
switch c.config.Transport {
|
||||
case MCPTransportStdio:
|
||||
// 使用官方 SDK
|
||||
if c.session == nil {
|
||||
return nil, fmt.Errorf("MCP session not initialized")
|
||||
}
|
||||
|
||||
result, err := c.session.ListTools(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list tools: %v", err)
|
||||
}
|
||||
|
||||
// 转换为内部格式
|
||||
for _, tool := range result.Tools {
|
||||
inputSchema := make(map[string]interface{})
|
||||
if tool.InputSchema != nil {
|
||||
// 将 SDK 的 InputSchema 转换为 map
|
||||
schemaBytes, err := json.Marshal(tool.InputSchema)
|
||||
if err == nil {
|
||||
json.Unmarshal(schemaBytes, &inputSchema)
|
||||
}
|
||||
}
|
||||
|
||||
tools = append(tools, MCPTool{
|
||||
Name: tool.Name,
|
||||
Description: tool.Description,
|
||||
InputSchema: inputSchema,
|
||||
})
|
||||
}
|
||||
|
||||
case MCPTransportSSE:
|
||||
// 使用自定义 HTTP 实现
|
||||
var err error
|
||||
tools, err = c.listToolsSSE(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
c.tools = tools
|
||||
c.mu.Unlock()
|
||||
|
||||
return tools, nil
|
||||
}
|
||||
|
||||
// listToolsSSE 通过 SSE 获取工具列表
|
||||
func (c *MCPClient) listToolsSSE(ctx context.Context) ([]MCPTool, error) {
|
||||
req := map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/list",
|
||||
}
|
||||
|
||||
resp, err := c.sendSSERequest(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resultBytes, _ := json.Marshal(resp["result"])
|
||||
var result struct {
|
||||
Tools []MCPTool `json:"tools"`
|
||||
}
|
||||
if err := json.Unmarshal(resultBytes, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse tools list: %v", err)
|
||||
}
|
||||
|
||||
return result.Tools, nil
|
||||
}
|
||||
|
||||
// CallTool 调用工具
|
||||
func (c *MCPClient) CallTool(ctx context.Context, name string, arguments map[string]interface{}) (*MCPToolsCallResult, error) {
|
||||
switch c.config.Transport {
|
||||
case MCPTransportStdio:
|
||||
return c.callToolStdio(ctx, name, arguments)
|
||||
case MCPTransportSSE:
|
||||
return c.callToolSSE(ctx, name, arguments)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported transport: %s", c.config.Transport)
|
||||
}
|
||||
}
|
||||
|
||||
// callToolStdio 通过 stdio 调用工具(使用官方 SDK)
|
||||
func (c *MCPClient) callToolStdio(ctx context.Context, name string, arguments map[string]interface{}) (*MCPToolsCallResult, error) {
|
||||
if c.session == nil {
|
||||
return nil, fmt.Errorf("MCP session not initialized")
|
||||
}
|
||||
|
||||
// 调用工具
|
||||
result, err := c.session.CallTool(ctx, &mcp.CallToolParams{
|
||||
Name: name,
|
||||
Arguments: arguments,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("tool call failed: %v", err)
|
||||
}
|
||||
|
||||
// 转换结果
|
||||
mcpResult := &MCPToolsCallResult{
|
||||
IsError: result.IsError,
|
||||
}
|
||||
|
||||
for _, content := range result.Content {
|
||||
mc := MCPContent{}
|
||||
|
||||
// 根据具体类型提取内容
|
||||
switch c := content.(type) {
|
||||
case *mcp.TextContent:
|
||||
mc.Type = "text"
|
||||
mc.Text = c.Text
|
||||
case *mcp.ImageContent:
|
||||
mc.Type = "image"
|
||||
mc.Data = string(c.Data)
|
||||
mc.MimeType = c.MIMEType
|
||||
case *mcp.AudioContent:
|
||||
mc.Type = "audio"
|
||||
mc.Data = string(c.Data)
|
||||
mc.MimeType = c.MIMEType
|
||||
case *mcp.EmbeddedResource:
|
||||
mc.Type = "resource"
|
||||
if c.Resource != nil {
|
||||
if c.Resource.Text != "" {
|
||||
mc.Text = c.Resource.Text
|
||||
} else if c.Resource.Blob != nil {
|
||||
mc.Data = string(c.Resource.Blob)
|
||||
}
|
||||
mc.MimeType = c.Resource.MIMEType
|
||||
}
|
||||
case *mcp.ResourceLink:
|
||||
mc.Type = "resource_link"
|
||||
mc.Text = c.URI
|
||||
default:
|
||||
// 尝试通过 JSON 序列化获取内容
|
||||
if data, err := json.Marshal(content); err == nil {
|
||||
mc.Type = "unknown"
|
||||
mc.Text = string(data)
|
||||
}
|
||||
}
|
||||
|
||||
mcpResult.Content = append(mcpResult.Content, mc)
|
||||
}
|
||||
|
||||
return mcpResult, nil
|
||||
}
|
||||
|
||||
// callToolSSE 通过 SSE 调用工具
|
||||
func (c *MCPClient) callToolSSE(ctx context.Context, name string, arguments map[string]interface{}) (*MCPToolsCallResult, error) {
|
||||
req := map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": name,
|
||||
"arguments": arguments,
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := c.sendSSERequest(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if errObj, ok := resp["error"].(map[string]interface{}); ok {
|
||||
return nil, fmt.Errorf("MCP error: %v", errObj["message"])
|
||||
}
|
||||
|
||||
resultBytes, _ := json.Marshal(resp["result"])
|
||||
var result MCPToolsCallResult
|
||||
if err := json.Unmarshal(resultBytes, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse tool call result: %v", err)
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
// setAuthHeaders 设置鉴权请求头
|
||||
func (c *MCPClient) setAuthHeaders(req *http.Request) {
|
||||
cfg := c.config
|
||||
|
||||
if cfg.AuthType == "" && cfg.APIKey == "" {
|
||||
return
|
||||
}
|
||||
|
||||
apiKey := expandEnvVars(cfg.APIKey)
|
||||
username := expandEnvVars(cfg.Username)
|
||||
password := expandEnvVars(cfg.Password)
|
||||
|
||||
switch strings.ToLower(cfg.AuthType) {
|
||||
case "bearer":
|
||||
if apiKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||
}
|
||||
case "api_key", "apikey":
|
||||
if apiKey != "" {
|
||||
req.Header.Set("X-API-Key", apiKey)
|
||||
}
|
||||
case "basic":
|
||||
if username != "" {
|
||||
req.SetBasicAuth(username, password)
|
||||
}
|
||||
default:
|
||||
if apiKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendSSERequest 通过 HTTP 发送请求
|
||||
func (c *MCPClient) sendSSERequest(ctx context.Context, req map[string]interface{}) (map[string]interface{}, error) {
|
||||
baseURL := c.sseURL
|
||||
if !strings.HasSuffix(baseURL, "/") {
|
||||
baseURL += "/"
|
||||
}
|
||||
|
||||
postURL := baseURL + "message"
|
||||
if _, err := url.Parse(postURL); err != nil {
|
||||
return nil, fmt.Errorf("invalid URL: %v", err)
|
||||
}
|
||||
|
||||
data, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %v", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", postURL, bytes.NewBuffer(data))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP request: %v", err)
|
||||
}
|
||||
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
c.setAuthHeaders(httpReq)
|
||||
|
||||
for k, v := range c.config.Headers {
|
||||
httpReq.Header.Set(k, expandEnvVars(v))
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("HTTP request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %v", err)
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Close 关闭连接
|
||||
func (c *MCPClient) Close() error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if c.session != nil {
|
||||
c.session.Close()
|
||||
c.session = nil
|
||||
}
|
||||
|
||||
c.client = nil
|
||||
c.initialized = false
|
||||
logger.Infof("MCP client closed: %s", c.config.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// MCPClientManager MCP 客户端管理器
|
||||
type MCPClientManager struct {
|
||||
clients map[string]*MCPClient
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewMCPClientManager 创建 MCP 客户端管理器
|
||||
func NewMCPClientManager() *MCPClientManager {
|
||||
return &MCPClientManager{
|
||||
clients: make(map[string]*MCPClient),
|
||||
}
|
||||
}
|
||||
|
||||
// GetOrCreateClient 获取或创建 MCP 客户端
|
||||
func (m *MCPClientManager) GetOrCreateClient(ctx context.Context, config *MCPServerConfig) (*MCPClient, error) {
|
||||
m.mu.RLock()
|
||||
client, ok := m.clients[config.Name]
|
||||
m.mu.RUnlock()
|
||||
|
||||
if ok {
|
||||
return client, nil
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
// 再次检查(double-check locking)
|
||||
if client, ok := m.clients[config.Name]; ok {
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// 创建新客户端
|
||||
client, err := NewMCPClient(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 连接
|
||||
if err := client.Connect(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.clients[config.Name] = client
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// CloseAll 关闭所有客户端
|
||||
func (m *MCPClientManager) CloseAll() {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
for name, client := range m.clients {
|
||||
if err := client.Close(); err != nil {
|
||||
logger.Warningf("Failed to close MCP client %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
m.clients = make(map[string]*MCPClient)
|
||||
}
|
||||
30
aiagent/prompts/embed.go
Normal file
30
aiagent/prompts/embed.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package prompts
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
)
|
||||
|
||||
// ReAct 模式系统提示词
|
||||
//
|
||||
//go:embed react_system.md
|
||||
var ReactSystemPrompt string
|
||||
|
||||
// Plan+ReAct 模式规划阶段系统提示词
|
||||
//
|
||||
//go:embed plan_system.md
|
||||
var PlanSystemPrompt string
|
||||
|
||||
// 步骤执行提示词
|
||||
//
|
||||
//go:embed step_execution.md
|
||||
var StepExecutionPrompt string
|
||||
|
||||
// 综合分析提示词
|
||||
//
|
||||
//go:embed synthesis.md
|
||||
var SynthesisPrompt string
|
||||
|
||||
// 用户提示词默认模板
|
||||
//
|
||||
//go:embed user_default.md
|
||||
var UserDefaultTemplate string
|
||||
65
aiagent/prompts/plan_system.md
Normal file
65
aiagent/prompts/plan_system.md
Normal file
@@ -0,0 +1,65 @@
|
||||
You are an intelligent AI Agent capable of analyzing tasks, creating execution plans, and solving complex problems.
|
||||
|
||||
Your role is to understand user requests and create structured, actionable execution plans.
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
- **Alert Analysis**: Analyze alerts, investigate root causes, correlate events
|
||||
- **Data Analysis**: Analyze batch data, identify patterns, generate insights
|
||||
- **SQL Generation**: Convert natural language to SQL queries
|
||||
- **General Problem Solving**: Break down complex tasks into actionable steps
|
||||
|
||||
## Planning Principles
|
||||
|
||||
1. **Understand First**: Carefully analyze what the user is asking for
|
||||
2. **Identify Key Areas**: Determine which domains, systems, or aspects are involved
|
||||
3. **Create Logical Steps**: Order steps by priority or logical sequence
|
||||
4. **Be Specific**: Each step should have a clear goal and concrete approach
|
||||
5. **Reference Tools**: Consider available tools when designing your approach
|
||||
|
||||
## Response Format
|
||||
|
||||
You must respond in the following JSON format:
|
||||
|
||||
```json
|
||||
{
|
||||
"task_summary": "Brief summary of the input/request",
|
||||
"goal": "The overall goal of this task",
|
||||
"focus_areas": ["area1", "area2", "area3"],
|
||||
"steps": [
|
||||
{
|
||||
"step_number": 1,
|
||||
"goal": "What to accomplish in this step",
|
||||
"approach": "How to accomplish it (which tools/methods to use)"
|
||||
},
|
||||
{
|
||||
"step_number": 2,
|
||||
"goal": "...",
|
||||
"approach": "..."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Focus Areas by Task Type
|
||||
|
||||
**Alert/Incident Analysis:**
|
||||
- Network: latency, packet loss, DNS resolution
|
||||
- Database: query performance, connections, locks, replication
|
||||
- Application: error rates, response times, resource usage
|
||||
- Infrastructure: CPU, memory, disk I/O, network throughput
|
||||
|
||||
**Batch Alert Analysis:**
|
||||
- Pattern recognition: common labels, time correlation
|
||||
- Aggregation: group by severity, source, category
|
||||
- Trend analysis: frequency, escalation patterns
|
||||
|
||||
**SQL Generation:**
|
||||
- Schema understanding: tables, columns, relationships
|
||||
- Query optimization: indexes, join strategies
|
||||
- Data validation: constraints, data types
|
||||
|
||||
**General Analysis:**
|
||||
- Data collection: gather relevant information
|
||||
- Processing: transform, filter, aggregate
|
||||
- Output: format results appropriately
|
||||
42
aiagent/prompts/react_system.md
Normal file
42
aiagent/prompts/react_system.md
Normal file
@@ -0,0 +1,42 @@
|
||||
You are an intelligent AI Agent capable of analyzing tasks, creating execution plans, and solving complex problems.
|
||||
|
||||
Your capabilities include but are not limited to:
|
||||
- **Root Cause Analysis**: Analyze alerts, investigate incidents, identify root causes
|
||||
- **Data Analysis**: Query and analyze metrics, logs, traces, and other data sources
|
||||
- **SQL Generation**: Convert natural language queries to SQL statements
|
||||
- **Information Synthesis**: Summarize and extract insights from complex data
|
||||
- **Content Generation**: Generate titles, summaries, and structured reports
|
||||
|
||||
## Core Principles
|
||||
|
||||
1. **Systematic Analysis**: Gather sufficient information before making conclusions
|
||||
2. **Evidence-Based**: Support conclusions with specific data from tool outputs
|
||||
3. **Tool Efficiency**: Use tools wisely, avoid redundant calls
|
||||
4. **Clear Communication**: Keep responses focused and actionable
|
||||
5. **Adaptability**: Adjust your approach based on the task type
|
||||
|
||||
## Response Format
|
||||
|
||||
You must respond in the following format:
|
||||
|
||||
```
|
||||
Thought: [Your reasoning about the current situation and what to do next]
|
||||
Action: [The tool name to use, or 'Final Answer' if you have enough information]
|
||||
Action Input: [The input to the action - for tools, provide JSON parameters; for Final Answer, provide your result]
|
||||
```
|
||||
|
||||
## Task Guidelines
|
||||
|
||||
1. **Understand the request**: Carefully analyze what the user is asking for
|
||||
2. **Choose appropriate tools**: Select tools that best fit the task requirements
|
||||
3. **Iterate as needed**: Gather additional information if initial results are insufficient
|
||||
4. **Validate results**: Verify your conclusions before providing the final answer
|
||||
5. **Be concise**: Provide clear, well-structured responses
|
||||
|
||||
## Final Answer Requirements
|
||||
|
||||
Your Final Answer should:
|
||||
- Directly address the user's request
|
||||
- Be well-structured and easy to understand
|
||||
- Include supporting evidence or reasoning when applicable
|
||||
- Provide actionable recommendations if relevant
|
||||
35
aiagent/prompts/step_execution.md
Normal file
35
aiagent/prompts/step_execution.md
Normal file
@@ -0,0 +1,35 @@
|
||||
You are an intelligent AI Agent executing a specific step as part of a larger execution plan.
|
||||
|
||||
## Your Task
|
||||
|
||||
Focus on completing the current step efficiently and thoroughly. Use the available tools to gather information, process data, or generate results as needed to achieve the step's goal.
|
||||
|
||||
## Response Format
|
||||
|
||||
Respond in this format:
|
||||
|
||||
```
|
||||
Thought: [Your reasoning about what to do for this step]
|
||||
Action: [Tool name or 'Step Complete' when done]
|
||||
Action Input: [Tool parameters as JSON, or step summary for 'Step Complete']
|
||||
```
|
||||
|
||||
## Step Execution Guidelines
|
||||
|
||||
1. **Stay Focused**: Only work on the current step's goal
|
||||
2. **Be Thorough**: Gather enough information to achieve the goal
|
||||
3. **Document Progress**: Note important findings in your thoughts
|
||||
4. **Know When to Stop**: Complete the step when you have sufficient results
|
||||
5. **Handle Failures**: If a tool fails, try alternatives or note the limitation
|
||||
|
||||
## When to Mark Step Complete
|
||||
|
||||
Mark the step as complete when:
|
||||
- You have achieved the step's goal
|
||||
- You have gathered sufficient information or generated the required output
|
||||
- Further work would be outside the step's scope
|
||||
|
||||
Your step summary should include:
|
||||
- Key results or findings relevant to the step's goal
|
||||
- Tools used and their outputs
|
||||
- Any limitations or issues encountered
|
||||
37
aiagent/prompts/synthesis.md
Normal file
37
aiagent/prompts/synthesis.md
Normal file
@@ -0,0 +1,37 @@
|
||||
You are an intelligent AI Agent synthesizing results from multiple execution steps into a comprehensive final output.
|
||||
|
||||
## Your Task
|
||||
|
||||
Review all the results from the completed steps and provide a unified, well-structured response that addresses the original request.
|
||||
|
||||
## Response Guidelines
|
||||
|
||||
Based on the task type, structure your response appropriately:
|
||||
|
||||
**For Root Cause Analysis:**
|
||||
- Summary of the root cause
|
||||
- Supporting evidence from investigation
|
||||
- Impact assessment
|
||||
- Recommended actions
|
||||
|
||||
**For Data Analysis / SQL Generation:**
|
||||
- Query results or generated SQL
|
||||
- Key insights from the data
|
||||
- Any caveats or limitations
|
||||
|
||||
**For Information Synthesis:**
|
||||
- Structured summary of findings
|
||||
- Key insights and patterns
|
||||
- Relevant conclusions
|
||||
|
||||
**For Content Generation:**
|
||||
- Generated content (title, summary, etc.)
|
||||
- Alternative options if applicable
|
||||
|
||||
## Synthesis Principles
|
||||
|
||||
1. **Integrate Results**: Combine findings from all completed steps coherently
|
||||
2. **Prioritize Relevance**: Focus on the most important information
|
||||
3. **Be Structured**: Organize output in a clear, logical format
|
||||
4. **Be Concise**: Avoid unnecessary verbosity while ensuring completeness
|
||||
5. **Address the Request**: Ensure the final output directly answers the original task
|
||||
7
aiagent/prompts/user_default.md
Normal file
7
aiagent/prompts/user_default.md
Normal file
@@ -0,0 +1,7 @@
|
||||
## Alert Information
|
||||
|
||||
{{.AlertContent}}
|
||||
|
||||
## Analysis Request
|
||||
|
||||
Please analyze this alert and identify the root cause. Provide evidence-based conclusions and actionable recommendations.
|
||||
573
aiagent/skill.go
Normal file
573
aiagent/skill.go
Normal file
@@ -0,0 +1,573 @@
|
||||
package aiagent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const (
|
||||
// SkillFileName 技能主文件名
|
||||
SkillFileName = "SKILL.md"
|
||||
// SkillToolsDir 技能工具目录名
|
||||
SkillToolsDir = "skill_tools"
|
||||
|
||||
// 默认配置
|
||||
DefaultMaxSkills = 2
|
||||
)
|
||||
|
||||
// SkillConfig 技能配置(在 AIAgentConfig 中使用)
|
||||
// 技能目录路径通过全局配置 Plus.AIAgentSkillsPath 设置
|
||||
type SkillConfig struct {
|
||||
// 技能选择配置(优先级:SkillNames > LLM 选择 > DefaultSkills)
|
||||
AutoSelect bool `json:"auto_select,omitempty"` // 是否让 LLM 自动选择技能(默认 true)
|
||||
SkillNames []string `json:"skill_names,omitempty"` // 直接指定技能名列表(手动模式)
|
||||
MaxSkills int `json:"max_skills,omitempty"` // LLM 最多选择几个技能(默认 2)
|
||||
DefaultSkills []string `json:"default_skills,omitempty"` // 默认技能列表(LLM 无法选择时使用)
|
||||
}
|
||||
|
||||
// SkillMetadata 技能元数据(Level 1 - 总是在内存中)
|
||||
type SkillMetadata struct {
|
||||
// 核心字段(与 Anthropic 官方一致)
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Description string `yaml:"description" json:"description"`
|
||||
|
||||
// 可选扩展字段
|
||||
RecommendedTools []string `yaml:"recommended_tools,omitempty" json:"recommended_tools,omitempty"`
|
||||
BuiltinTools []string `yaml:"builtin_tools,omitempty" json:"builtin_tools,omitempty"` // 内置工具列表
|
||||
|
||||
// 内部字段
|
||||
Path string `json:"-"` // 技能目录路径
|
||||
LoadedAt time.Time `json:"-"` // 加载时间
|
||||
}
|
||||
|
||||
// SkillContent 技能内容(Level 2 - 匹配时加载)
|
||||
type SkillContent struct {
|
||||
Metadata *SkillMetadata `json:"metadata"`
|
||||
MainContent string `json:"main_content"` // SKILL.md 正文
|
||||
}
|
||||
|
||||
// SkillTool Skill 专用工具(Level 3 - 按需加载)
|
||||
type SkillTool struct {
|
||||
Name string `yaml:"name" json:"name"` // 工具名称
|
||||
Type string `yaml:"type" json:"type"` // 处理器类型:annotation_qd, script, callback 等
|
||||
Description string `yaml:"description" json:"description"` // 工具描述
|
||||
Config map[string]interface{} `yaml:"config" json:"config"` // 处理器配置
|
||||
|
||||
// 参数定义(可选)
|
||||
Parameters []ToolParameter `yaml:"parameters,omitempty" json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
// SkillResources 技能扩展资源(Level 3 - 按需加载)
|
||||
type SkillResources struct {
|
||||
SkillTools map[string]*SkillTool `json:"skill_tools"` // 工具名 -> 工具定义
|
||||
References map[string]string `json:"references"` // 引用文件内容
|
||||
}
|
||||
|
||||
// SkillRegistry 技能注册表
|
||||
type SkillRegistry struct {
|
||||
skillsPath string // 技能目录路径
|
||||
skills map[string]*SkillMetadata // name -> metadata
|
||||
contentCache map[string]*SkillContent // name -> content (LRU cache)
|
||||
toolsCache map[string]map[string]*SkillTool // skillName -> toolName -> tool
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewSkillRegistry 创建技能注册表
|
||||
func NewSkillRegistry(skillsPath string) *SkillRegistry {
|
||||
registry := &SkillRegistry{
|
||||
skillsPath: skillsPath,
|
||||
skills: make(map[string]*SkillMetadata),
|
||||
contentCache: make(map[string]*SkillContent),
|
||||
toolsCache: make(map[string]map[string]*SkillTool),
|
||||
}
|
||||
|
||||
// 初始加载所有技能元数据
|
||||
if err := registry.loadAllMetadata(); err != nil {
|
||||
logger.Warningf("Failed to load skill metadata: %v", err)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// loadAllMetadata 加载所有技能的元数据(Level 1)
|
||||
func (r *SkillRegistry) loadAllMetadata() error {
|
||||
if r.skillsPath == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 检查目录是否存在
|
||||
if _, err := os.Stat(r.skillsPath); os.IsNotExist(err) {
|
||||
logger.Debugf("Skills directory does not exist: %s", r.skillsPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// 遍历技能目录
|
||||
entries, err := os.ReadDir(r.skillsPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read skills directory: %v", err)
|
||||
}
|
||||
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
skillPath := filepath.Join(r.skillsPath, entry.Name())
|
||||
skillFile := filepath.Join(skillPath, SkillFileName)
|
||||
|
||||
// 检查 SKILL.md 是否存在
|
||||
if _, err := os.Stat(skillFile); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
// 加载元数据
|
||||
metadata, err := r.loadMetadataFromFile(skillFile)
|
||||
if err != nil {
|
||||
logger.Warningf("Failed to load skill metadata from %s: %v", skillFile, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metadata.Path = skillPath
|
||||
metadata.LoadedAt = time.Now()
|
||||
r.skills[metadata.Name] = metadata
|
||||
|
||||
logger.Debugf("Loaded skill metadata: %s from %s", metadata.Name, skillPath)
|
||||
}
|
||||
|
||||
logger.Infof("Loaded %d skills from %s", len(r.skills), r.skillsPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// 从 SKILL.md 文件加载元数据
|
||||
func (r *SkillRegistry) loadMetadataFromFile(filePath string) (*SkillMetadata, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// 解析 YAML frontmatter
|
||||
scanner := bufio.NewScanner(file)
|
||||
var inFrontmatter bool
|
||||
var frontmatterLines []string
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
if line == "---" {
|
||||
if !inFrontmatter {
|
||||
inFrontmatter = true
|
||||
continue
|
||||
} else {
|
||||
// frontmatter 结束
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if inFrontmatter {
|
||||
frontmatterLines = append(frontmatterLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("failed to scan file: %v", err)
|
||||
}
|
||||
|
||||
if len(frontmatterLines) == 0 {
|
||||
return nil, fmt.Errorf("no frontmatter found in %s", filePath)
|
||||
}
|
||||
|
||||
// 解析 YAML
|
||||
frontmatter := strings.Join(frontmatterLines, "\n")
|
||||
var metadata SkillMetadata
|
||||
if err := yaml.Unmarshal([]byte(frontmatter), &metadata); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse frontmatter: %v", err)
|
||||
}
|
||||
|
||||
if metadata.Name == "" {
|
||||
return nil, fmt.Errorf("skill name is required in frontmatter")
|
||||
}
|
||||
|
||||
return &metadata, nil
|
||||
}
|
||||
|
||||
// GetByName 根据名称获取技能元数据
|
||||
func (r *SkillRegistry) GetByName(name string) *SkillMetadata {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
return r.skills[name]
|
||||
}
|
||||
|
||||
// ListAll 列出所有技能元数据
|
||||
func (r *SkillRegistry) ListAll() []*SkillMetadata {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
result := make([]*SkillMetadata, 0, len(r.skills))
|
||||
for _, metadata := range r.skills {
|
||||
result = append(result, metadata)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// LoadContent 加载技能内容(Level 2)
|
||||
func (r *SkillRegistry) LoadContent(metadata *SkillMetadata) (*SkillContent, error) {
|
||||
if metadata == nil {
|
||||
return nil, fmt.Errorf("metadata is nil")
|
||||
}
|
||||
|
||||
// 检查缓存
|
||||
r.mu.RLock()
|
||||
if cached, ok := r.contentCache[metadata.Name]; ok {
|
||||
r.mu.RUnlock()
|
||||
return cached, nil
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
|
||||
// 加载内容
|
||||
skillFile := filepath.Join(metadata.Path, SkillFileName)
|
||||
content, err := r.loadContentFromFile(skillFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
skillContent := &SkillContent{
|
||||
Metadata: metadata,
|
||||
MainContent: content,
|
||||
}
|
||||
|
||||
// 缓存
|
||||
r.mu.Lock()
|
||||
r.contentCache[metadata.Name] = skillContent
|
||||
r.mu.Unlock()
|
||||
|
||||
return skillContent, nil
|
||||
}
|
||||
|
||||
// loadContentFromFile 从 SKILL.md 文件加载正文内容
|
||||
func (r *SkillRegistry) loadContentFromFile(filePath string) (string, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to open file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
var inFrontmatter bool
|
||||
var frontmatterEnded bool
|
||||
var contentLines []string
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
if strings.TrimSpace(line) == "---" {
|
||||
if !inFrontmatter {
|
||||
inFrontmatter = true
|
||||
continue
|
||||
} else {
|
||||
frontmatterEnded = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if frontmatterEnded {
|
||||
contentLines = append(contentLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", fmt.Errorf("failed to scan file: %v", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(strings.Join(contentLines, "\n")), nil
|
||||
}
|
||||
|
||||
// LoadSkillTool 加载单个 skill_tool(Level 3 - 完整配置)
|
||||
func (r *SkillRegistry) LoadSkillTool(skillName, toolName string) (*SkillTool, error) {
|
||||
// 检查缓存
|
||||
r.mu.RLock()
|
||||
if skillTools, ok := r.toolsCache[skillName]; ok {
|
||||
if tool, ok := skillTools[toolName]; ok {
|
||||
r.mu.RUnlock()
|
||||
return tool, nil
|
||||
}
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
|
||||
// 获取技能元数据
|
||||
metadata := r.GetByName(skillName)
|
||||
if metadata == nil {
|
||||
return nil, fmt.Errorf("skill '%s' not found", skillName)
|
||||
}
|
||||
|
||||
// 加载工具
|
||||
toolFile := filepath.Join(metadata.Path, SkillToolsDir, toolName+".yaml")
|
||||
tool, err := r.loadToolFromFile(toolFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 缓存
|
||||
r.mu.Lock()
|
||||
if r.toolsCache[skillName] == nil {
|
||||
r.toolsCache[skillName] = make(map[string]*SkillTool)
|
||||
}
|
||||
r.toolsCache[skillName][toolName] = tool
|
||||
r.mu.Unlock()
|
||||
|
||||
return tool, nil
|
||||
}
|
||||
|
||||
// LoadSkillToolDescription 加载 skill_tool 的描述信息(轻量级,只读取 name 和 description)
|
||||
func (r *SkillRegistry) LoadSkillToolDescription(skillName, toolName string) (string, error) {
|
||||
// 检查缓存 - 如果已经加载了完整工具,直接返回 description
|
||||
r.mu.RLock()
|
||||
if skillTools, ok := r.toolsCache[skillName]; ok {
|
||||
if tool, ok := skillTools[toolName]; ok {
|
||||
r.mu.RUnlock()
|
||||
return tool.Description, nil
|
||||
}
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
|
||||
// 获取技能元数据
|
||||
metadata := r.GetByName(skillName)
|
||||
if metadata == nil {
|
||||
return "", fmt.Errorf("skill '%s' not found", skillName)
|
||||
}
|
||||
|
||||
// 只加载 description(不缓存完整工具,保持延迟加载特性)
|
||||
toolFile := filepath.Join(metadata.Path, SkillToolsDir, toolName+".yaml")
|
||||
tool, err := r.loadToolFromFile(toolFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return tool.Description, nil
|
||||
}
|
||||
|
||||
// LoadAllSkillToolDescriptions 加载技能目录下所有 skill_tools 的描述
|
||||
func (r *SkillRegistry) LoadAllSkillToolDescriptions(skillName string) (map[string]string, error) {
|
||||
metadata := r.GetByName(skillName)
|
||||
if metadata == nil {
|
||||
return nil, fmt.Errorf("skill '%s' not found", skillName)
|
||||
}
|
||||
|
||||
toolsDir := filepath.Join(metadata.Path, SkillToolsDir)
|
||||
|
||||
// 检查目录是否存在
|
||||
if _, err := os.Stat(toolsDir); os.IsNotExist(err) {
|
||||
return make(map[string]string), nil
|
||||
}
|
||||
|
||||
// 遍历 skill_tools 目录
|
||||
entries, err := os.ReadDir(toolsDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read skill_tools directory: %v", err)
|
||||
}
|
||||
|
||||
descriptions := make(map[string]string)
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// 只处理 .yaml 文件
|
||||
name := entry.Name()
|
||||
if !strings.HasSuffix(name, ".yaml") && !strings.HasSuffix(name, ".yml") {
|
||||
continue
|
||||
}
|
||||
|
||||
toolFile := filepath.Join(toolsDir, name)
|
||||
tool, err := r.loadToolFromFile(toolFile)
|
||||
if err != nil {
|
||||
logger.Warningf("Failed to load skill tool %s: %v", toolFile, err)
|
||||
continue
|
||||
}
|
||||
|
||||
descriptions[tool.Name] = tool.Description
|
||||
}
|
||||
|
||||
return descriptions, nil
|
||||
}
|
||||
|
||||
// loadToolFromFile 从文件加载工具定义
|
||||
func (r *SkillRegistry) loadToolFromFile(filePath string) (*SkillTool, error) {
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read tool file: %v", err)
|
||||
}
|
||||
|
||||
var tool SkillTool
|
||||
if err := yaml.Unmarshal(data, &tool); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse tool file: %v", err)
|
||||
}
|
||||
|
||||
return &tool, nil
|
||||
}
|
||||
|
||||
// LoadReference 加载引用文件(Level 3)
|
||||
func (r *SkillRegistry) LoadReference(metadata *SkillMetadata, refName string) (string, error) {
|
||||
if metadata == nil {
|
||||
return "", fmt.Errorf("metadata is nil")
|
||||
}
|
||||
|
||||
refFile := filepath.Join(metadata.Path, refName)
|
||||
data, err := os.ReadFile(refFile)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read reference file: %v", err)
|
||||
}
|
||||
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
// Reload 重新加载所有技能元数据
|
||||
func (r *SkillRegistry) Reload() error {
|
||||
r.mu.Lock()
|
||||
r.skills = make(map[string]*SkillMetadata)
|
||||
r.contentCache = make(map[string]*SkillContent)
|
||||
r.toolsCache = make(map[string]map[string]*SkillTool)
|
||||
r.mu.Unlock()
|
||||
|
||||
return r.loadAllMetadata()
|
||||
}
|
||||
|
||||
// SkillSelector 技能选择器接口
|
||||
type SkillSelector interface {
|
||||
// SelectMultiple 让 LLM 根据任务内容选择最合适的技能(可多选)
|
||||
SelectMultiple(ctx context.Context, taskContext string, availableSkills []*SkillMetadata, maxSkills int) ([]*SkillMetadata, error)
|
||||
}
|
||||
|
||||
// LLMSkillSelector 基于 LLM 的技能选择器
|
||||
type LLMSkillSelector struct {
|
||||
llmCaller func(ctx context.Context, messages []ChatMessage) (string, error)
|
||||
}
|
||||
|
||||
// NewLLMSkillSelector 创建 LLM 技能选择器
|
||||
func NewLLMSkillSelector(llmCaller func(ctx context.Context, messages []ChatMessage) (string, error)) *LLMSkillSelector {
|
||||
return &LLMSkillSelector{
|
||||
llmCaller: llmCaller,
|
||||
}
|
||||
}
|
||||
|
||||
// SelectMultiple 使用 LLM 选择技能
|
||||
func (s *LLMSkillSelector) SelectMultiple(ctx context.Context, taskContext string, availableSkills []*SkillMetadata, maxSkills int) ([]*SkillMetadata, error) {
|
||||
if len(availableSkills) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if maxSkills <= 0 {
|
||||
maxSkills = DefaultMaxSkills
|
||||
}
|
||||
|
||||
// 构建提示词
|
||||
systemPrompt := s.buildSelectionPrompt(availableSkills, maxSkills)
|
||||
|
||||
messages := []ChatMessage{
|
||||
{Role: "system", Content: systemPrompt},
|
||||
{Role: "user", Content: taskContext},
|
||||
}
|
||||
|
||||
// 调用 LLM
|
||||
response, err := s.llmCaller(ctx, messages)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("LLM call failed: %v", err)
|
||||
}
|
||||
|
||||
// 解析响应
|
||||
selectedNames := s.parseSelectionResponse(response)
|
||||
if len(selectedNames) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// 限制数量
|
||||
if len(selectedNames) > maxSkills {
|
||||
selectedNames = selectedNames[:maxSkills]
|
||||
}
|
||||
|
||||
// 转换为 SkillMetadata
|
||||
skillMap := make(map[string]*SkillMetadata)
|
||||
for _, skill := range availableSkills {
|
||||
skillMap[skill.Name] = skill
|
||||
}
|
||||
|
||||
var result []*SkillMetadata
|
||||
for _, name := range selectedNames {
|
||||
if skill, ok := skillMap[name]; ok {
|
||||
result = append(result, skill)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// buildSelectionPrompt 构建技能选择提示词
|
||||
func (s *LLMSkillSelector) buildSelectionPrompt(availableSkills []*SkillMetadata, maxSkills int) string {
|
||||
var sb strings.Builder
|
||||
|
||||
sb.WriteString(fmt.Sprintf(`你是一个技能选择器。根据以下任务上下文,选择最合适的技能(可选择 1-%d 个)。
|
||||
|
||||
## 可用技能
|
||||
|
||||
`, maxSkills))
|
||||
|
||||
for i, skill := range availableSkills {
|
||||
sb.WriteString(fmt.Sprintf("%d. **%s**\n", i+1, skill.Name))
|
||||
sb.WriteString(fmt.Sprintf(" %s\n\n", skill.Description))
|
||||
}
|
||||
|
||||
sb.WriteString(`## 输出格式
|
||||
|
||||
请以 JSON 数组格式返回选中的技能名称,例如:
|
||||
` + "```json\n" + `["skill-name-1", "skill-name-2"]
|
||||
` + "```" + `
|
||||
|
||||
## 选择原则
|
||||
|
||||
1. 选择与任务最相关的技能
|
||||
2. 如果任务涉及多个领域,可以选择多个技能
|
||||
3. 优先选择更具体、更专业的技能
|
||||
4. 如果没有合适的技能,返回空数组 []
|
||||
|
||||
请返回技能名称数组:`)
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// parseSelectionResponse 解析 LLM 的选择响应
|
||||
func (s *LLMSkillSelector) parseSelectionResponse(response string) []string {
|
||||
// 尝试从 JSON 代码块中提取
|
||||
response = strings.TrimSpace(response)
|
||||
|
||||
// 查找 JSON 数组
|
||||
start := strings.Index(response, "[")
|
||||
end := strings.LastIndex(response, "]")
|
||||
|
||||
if start < 0 || end <= start {
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonStr := response[start : end+1]
|
||||
|
||||
var skillNames []string
|
||||
if err := json.Unmarshal([]byte(jsonStr), &skillNames); err != nil {
|
||||
logger.Warningf("Failed to parse skill selection response: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return skillNames
|
||||
}
|
||||
@@ -75,11 +75,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
macros.RegisterMacro(macros.MacroInVain)
|
||||
dscache.Init(ctx, false)
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
|
||||
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
|
||||
@@ -98,7 +98,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
|
||||
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
|
||||
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType) {
|
||||
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType, configCvalCache *memsto.CvalCache) {
|
||||
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
|
||||
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
|
||||
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
|
||||
@@ -117,14 +117,14 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
|
||||
|
||||
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
|
||||
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, configCvalCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
|
||||
|
||||
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)
|
||||
notifyRecordConsumer := sender.NewNotifyRecordConsumer(ctx)
|
||||
|
||||
go dp.ReloadTpls()
|
||||
go consumer.LoopConsume()
|
||||
go notifyRecordComsumer.LoopConsume()
|
||||
go notifyRecordConsumer.LoopConsume()
|
||||
|
||||
go queue.ReportQueueSize(alertStats)
|
||||
go sender.ReportNotifyRecordQueueSize(alertStats)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -13,6 +14,20 @@ func RuleKey(datasourceId, id int64) string {
|
||||
|
||||
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
|
||||
for _, filter := range itags {
|
||||
// target_group in和not in优先特殊处理:匹配通过则继续下一个 filter,匹配失败则整组不匹配
|
||||
if filter.Key == "target_group" {
|
||||
// target 字段从 event.JsonTagsAndValue() 中获取的
|
||||
v, ok := eventTagsMap["target"]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if !targetGroupMatch(v, filter) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// 普通标签按原逻辑处理
|
||||
value, has := eventTagsMap[filter.Key]
|
||||
if !has {
|
||||
return false
|
||||
@@ -35,9 +50,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
|
||||
func matchTag(value string, filter models.TagFilter) bool {
|
||||
switch filter.Func {
|
||||
case "==":
|
||||
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
|
||||
case "!=":
|
||||
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
|
||||
case "in":
|
||||
_, has := filter.Vset[value]
|
||||
return has
|
||||
@@ -49,6 +64,65 @@ func matchTag(value string, filter models.TagFilter) bool {
|
||||
case "!~":
|
||||
return !filter.Regexp.MatchString(value)
|
||||
}
|
||||
// unexpect func
|
||||
// unexpected func
|
||||
return false
|
||||
}
|
||||
|
||||
// targetGroupMatch 处理 target_group 的特殊匹配逻辑
|
||||
func targetGroupMatch(value string, filter models.TagFilter) bool {
|
||||
var valueMap map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(value), &valueMap); err != nil {
|
||||
return false
|
||||
}
|
||||
switch filter.Func {
|
||||
case "in", "not in":
|
||||
// float64 类型的 id 切片
|
||||
filterValueIds, ok := filter.Value.([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
filterValueIdsMap := make(map[float64]struct{})
|
||||
for _, id := range filterValueIds {
|
||||
filterValueIdsMap[id.(float64)] = struct{}{}
|
||||
}
|
||||
// float64 类型的 groupIds 切片
|
||||
groupIds, ok := valueMap["group_ids"].([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
// in 只要 groupIds 中有一个在 filterGroupIds 中出现,就返回 true
|
||||
// not in 则相反
|
||||
found := false
|
||||
for _, gid := range groupIds {
|
||||
if _, found = filterValueIdsMap[gid.(float64)]; found {
|
||||
break
|
||||
}
|
||||
}
|
||||
if filter.Func == "in" {
|
||||
return found
|
||||
}
|
||||
// filter.Func == "not in"
|
||||
return !found
|
||||
|
||||
case "=~", "!~":
|
||||
// 正则满足一个就认为 matched
|
||||
groupNames, ok := valueMap["group_names"].([]interface{})
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
matched := false
|
||||
for _, gname := range groupNames {
|
||||
if filter.Regexp.MatchString(fmt.Sprintf("%v", gname)) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if filter.Func == "=~" {
|
||||
return matched
|
||||
}
|
||||
// "!~": 只要有一个匹配就返回 false,否则返回 true
|
||||
return !matched
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
@@ -26,10 +26,15 @@ type Consumer struct {
|
||||
alerting aconf.Alerting
|
||||
ctx *ctx.Context
|
||||
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
}
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
|
||||
|
||||
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
|
||||
if promClients.IsNil(datasourceID) {
|
||||
@@ -43,12 +48,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
}
|
||||
|
||||
// 创建一个 Consumer 实例
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
|
||||
return &Consumer{
|
||||
alerting: alerting,
|
||||
ctx: ctx,
|
||||
dispatch: dispatch,
|
||||
promClients: promClients,
|
||||
|
||||
alertMuteCache: alertMuteCache,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,12 +98,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
|
||||
|
||||
if err := event.ParseRule("rule_name"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
|
||||
}
|
||||
|
||||
if err := event.ParseRule("annotations"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
event.AnnotationsJSON["error"] = event.Annotations
|
||||
}
|
||||
@@ -104,16 +111,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.queryRecoveryVal(event)
|
||||
|
||||
if err := event.ParseRule("rule_note"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
e.persist(event)
|
||||
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
e.dispatch.HandleEventNotify(event, false)
|
||||
}
|
||||
|
||||
@@ -127,7 +130,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
var err error
|
||||
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
|
||||
if err != nil {
|
||||
logger.Errorf("event:%+v persist err:%v", event, err)
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
return
|
||||
@@ -135,7 +138,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
|
||||
err := models.EventPersist(e.ctx, event)
|
||||
if err != nil {
|
||||
logger.Errorf("event%+v persist err:%v", event, err)
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -153,12 +156,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
|
||||
promql = strings.TrimSpace(promql)
|
||||
if promql == "" {
|
||||
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
|
||||
return
|
||||
}
|
||||
|
||||
if e.promClients.IsNil(event.DatasourceId) {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -167,7 +170,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
var warnings promsdk.Warnings
|
||||
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
|
||||
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
@@ -181,12 +184,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
|
||||
}
|
||||
|
||||
anomalyPoints := models.ConvertAnomalyPoints(value)
|
||||
if len(anomalyPoints) == 0 {
|
||||
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
|
||||
} else {
|
||||
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
|
||||
@@ -201,6 +204,3 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
}
|
||||
|
||||
func getKey(event *models.AlertCurEvent) string {
|
||||
return common.RuleKey(event.DatasourceId, event.RuleId)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -24,6 +25,17 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
|
||||
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType, *memsto.CvalCache,
|
||||
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
|
||||
|
||||
var EventProcessorCache *memsto.EventProcessorCacheType
|
||||
|
||||
func init() {
|
||||
ShouldSkipNotify = shouldSkipNotify
|
||||
SendByNotifyRule = SendNotifyRuleMessage
|
||||
}
|
||||
|
||||
type Dispatch struct {
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
userCache *memsto.UserCacheType
|
||||
@@ -32,6 +44,7 @@ type Dispatch struct {
|
||||
targetCache *memsto.TargetCacheType
|
||||
notifyConfigCache *memsto.NotifyConfigCacheType
|
||||
taskTplsCache *memsto.TaskTplCache
|
||||
configCvalCache *memsto.CvalCache
|
||||
|
||||
notifyRuleCache *memsto.NotifyRuleCacheType
|
||||
notifyChannelCache *memsto.NotifyChannelCacheType
|
||||
@@ -45,9 +58,8 @@ type Dispatch struct {
|
||||
tpls map[string]*template.Template
|
||||
ExtraSenders map[string]sender.Sender
|
||||
BeforeSenderHook func(*models.AlertCurEvent) bool
|
||||
|
||||
ctx *ctx.Context
|
||||
Astats *astats.Stats
|
||||
ctx *ctx.Context
|
||||
Astats *astats.Stats
|
||||
|
||||
RwLock sync.RWMutex
|
||||
}
|
||||
@@ -56,7 +68,7 @@ type Dispatch struct {
|
||||
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
|
||||
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
|
||||
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, configCvalCache *memsto.CvalCache, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
notify := &Dispatch{
|
||||
alertRuleCache: alertRuleCache,
|
||||
userCache: userCache,
|
||||
@@ -69,6 +81,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
notifyChannelCache: notifyChannelCache,
|
||||
messageTemplateCache: messageTemplateCache,
|
||||
eventProcessorCache: eventProcessorCache,
|
||||
configCvalCache: configCvalCache,
|
||||
|
||||
alerting: alerting,
|
||||
|
||||
@@ -77,11 +90,16 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
ExtraSenders: make(map[string]sender.Sender),
|
||||
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
|
||||
|
||||
ctx: ctx,
|
||||
ctx: c,
|
||||
Astats: astats,
|
||||
}
|
||||
|
||||
pipeline.Init()
|
||||
EventProcessorCache = eventProcessorCache
|
||||
|
||||
// 设置通知记录回调函数
|
||||
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
|
||||
|
||||
return notify
|
||||
}
|
||||
|
||||
@@ -153,7 +171,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
// 深拷贝新的 event,避免并发修改 event 冲突
|
||||
eventCopy := eventOrigin.DeepCopy()
|
||||
|
||||
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
|
||||
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
|
||||
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
|
||||
if notifyRule == nil {
|
||||
continue
|
||||
@@ -162,71 +180,106 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
if !notifyRule.Enable {
|
||||
continue
|
||||
}
|
||||
eventCopy.NotifyRuleId = notifyRuleId
|
||||
eventCopy.NotifyRuleName = notifyRule.Name
|
||||
|
||||
var processors []models.Processor
|
||||
for _, pipelineConfig := range notifyRule.PipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
if !pipelineApplicable(eventPipeline, eventCopy) {
|
||||
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
|
||||
continue
|
||||
}
|
||||
|
||||
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
|
||||
}
|
||||
|
||||
for _, processor := range processors {
|
||||
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
|
||||
eventCopy = processor.Process(e.ctx, eventCopy)
|
||||
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
|
||||
if eventCopy == nil {
|
||||
logger.Warningf("notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if eventCopy == nil {
|
||||
// 如果 eventCopy 为 nil,说明 eventCopy 被 processor drop 掉了, 不再发送通知
|
||||
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
|
||||
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
|
||||
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
// notify
|
||||
for i := range notifyRule.NotifyConfigs {
|
||||
if !NotifyRuleApplicable(¬ifyRule.NotifyConfigs[i], eventCopy) {
|
||||
err := NotifyRuleMatchCheck(¬ifyRule.NotifyConfigs[i], eventCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
continue
|
||||
}
|
||||
|
||||
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
|
||||
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
|
||||
if notifyChannel == nil {
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
|
||||
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
continue
|
||||
}
|
||||
|
||||
if notifyChannel.RequestType != "flashduty" && messageTemplate == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// todo go send
|
||||
// todo 聚合 event
|
||||
go e.sendV2([]*models.AlertCurEvent{eventCopy}, notifyRuleId, ¬ifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
|
||||
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, e.configCvalCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, ¬ifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
|
||||
if event == nil {
|
||||
// 如果 eventCopy 为 nil,说明 eventCopy 被 processor drop 掉了, 不再发送通知
|
||||
return true
|
||||
}
|
||||
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0,则不发送通知
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
|
||||
workflowEngine := engine.NewWorkflowEngine(ctx)
|
||||
|
||||
for _, pipelineConfig := range pipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
if !PipelineApplicable(eventPipeline, event) {
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeEvent,
|
||||
TriggerBy: from + "_" + strconv.FormatInt(id, 10),
|
||||
}
|
||||
|
||||
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
|
||||
if from == "notify_rule" {
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
event = resultEvent
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
|
||||
}
|
||||
|
||||
event.FE2DB()
|
||||
event.FillTagsMap()
|
||||
return event
|
||||
}
|
||||
|
||||
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
if pipeline == nil {
|
||||
return true
|
||||
}
|
||||
@@ -237,15 +290,18 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
tagMatch := true
|
||||
if len(pipeline.LabelFilters) > 0 {
|
||||
for i := range pipeline.LabelFilters {
|
||||
if pipeline.LabelFilters[i].Func == "" {
|
||||
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelFiltersCopy := make([]models.TagFilter, len(pipeline.LabelFilters))
|
||||
copy(labelFiltersCopy, pipeline.LabelFilters)
|
||||
for i := range labelFiltersCopy {
|
||||
if labelFiltersCopy[i].Func == "" {
|
||||
labelFiltersCopy[i].Func = labelFiltersCopy[i].Op
|
||||
}
|
||||
}
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
|
||||
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
|
||||
return false
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
@@ -253,9 +309,13 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
attributesMatch := true
|
||||
if len(pipeline.AttrFilters) > 0 {
|
||||
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attrFiltersCopy := make([]models.TagFilter, len(pipeline.AttrFilters))
|
||||
copy(attrFiltersCopy, pipeline.AttrFilters)
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -265,7 +325,7 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
return tagMatch && attributesMatch
|
||||
}
|
||||
|
||||
func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) bool {
|
||||
func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) error {
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := int(tm.Weekday())
|
||||
@@ -317,6 +377,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
}
|
||||
}
|
||||
|
||||
if !timeMatch {
|
||||
return fmt.Errorf("event time not match time filter")
|
||||
}
|
||||
|
||||
severityMatch := false
|
||||
for i := range notifyConfig.Severities {
|
||||
if notifyConfig.Severities[i] == event.Severity {
|
||||
@@ -324,39 +388,60 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
}
|
||||
}
|
||||
|
||||
if !severityMatch {
|
||||
return fmt.Errorf("event severity not match severity filter")
|
||||
}
|
||||
|
||||
tagMatch := true
|
||||
if len(notifyConfig.LabelKeys) > 0 {
|
||||
for i := range notifyConfig.LabelKeys {
|
||||
if notifyConfig.LabelKeys[i].Func == "" {
|
||||
notifyConfig.LabelKeys[i].Func = notifyConfig.LabelKeys[i].Op
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(notifyConfig.LabelKeys))
|
||||
copy(labelKeysCopy, notifyConfig.LabelKeys)
|
||||
for i := range labelKeysCopy {
|
||||
if labelKeysCopy[i].Func == "" {
|
||||
labelKeysCopy[i].Func = labelKeysCopy[i].Op
|
||||
}
|
||||
}
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
|
||||
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
|
||||
return false
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
}
|
||||
|
||||
if !tagMatch {
|
||||
return fmt.Errorf("event tag not match tag filter")
|
||||
}
|
||||
|
||||
attributesMatch := true
|
||||
if len(notifyConfig.Attributes) > 0 {
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(notifyConfig.Attributes))
|
||||
copy(attributesCopy, notifyConfig.Attributes)
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
|
||||
return false
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
|
||||
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
|
||||
}
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
|
||||
return timeMatch && severityMatch && tagMatch && attributesMatch
|
||||
|
||||
if !attributesMatch {
|
||||
return fmt.Errorf("event attributes not match attributes filter")
|
||||
}
|
||||
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, map[string]string) {
|
||||
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, []string, map[string]string) {
|
||||
customParams := make(map[string]string)
|
||||
var flashDutyChannelIDs []int64
|
||||
var pagerDutyRoutingKeys []string
|
||||
var userInfoParams models.CustomParams
|
||||
|
||||
for key, value := range notifyConfig.Params {
|
||||
@@ -374,13 +459,26 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
|
||||
}
|
||||
}
|
||||
}
|
||||
case "pagerduty_integration_keys", "pagerduty_integration_ids":
|
||||
if key == "pagerduty_integration_ids" {
|
||||
// 不处理ids,直接跳过,这个字段只给前端标记用
|
||||
continue
|
||||
}
|
||||
if data, err := json.Marshal(value); err == nil {
|
||||
var keys []string
|
||||
if json.Unmarshal(data, &keys) == nil {
|
||||
pagerDutyRoutingKeys = keys
|
||||
break
|
||||
}
|
||||
}
|
||||
default:
|
||||
// 避免直接 value.(string) 导致 panic,支持多种类型并统一为字符串
|
||||
customParams[key] = value.(string)
|
||||
}
|
||||
}
|
||||
|
||||
if len(userInfoParams.UserIDs) == 0 && len(userInfoParams.UserGroupIDs) == 0 {
|
||||
return []string{}, flashDutyChannelIDs, customParams
|
||||
return []string{}, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
|
||||
}
|
||||
|
||||
userIds := make([]int64, 0)
|
||||
@@ -416,18 +514,20 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
|
||||
visited[user.Id] = true
|
||||
}
|
||||
|
||||
return sendtos, flashDutyChannelIDs, customParams
|
||||
return sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
|
||||
}
|
||||
|
||||
func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
|
||||
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, configCvalCache *memsto.CvalCache,
|
||||
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
|
||||
if len(events) == 0 {
|
||||
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
|
||||
return
|
||||
}
|
||||
|
||||
siteInfo := configCvalCache.GetSiteInfo()
|
||||
tplContent := make(map[string]interface{})
|
||||
if notifyChannel.RequestType != "flashduty" {
|
||||
tplContent = messageTemplate.RenderEvent(events)
|
||||
tplContent = messageTemplate.RenderEvent(events, siteInfo.SiteUrl)
|
||||
}
|
||||
|
||||
var contactKey string
|
||||
@@ -435,10 +535,7 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
|
||||
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
|
||||
}
|
||||
|
||||
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, e.userCache, e.userGroupCache)
|
||||
|
||||
e.Astats.GaugeNotifyRecordQueueSize.Inc()
|
||||
defer e.Astats.GaugeNotifyRecordQueueSize.Dec()
|
||||
sendtos, flashDutyChannelIDs, pagerdutyRoutingKeys, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
|
||||
|
||||
switch notifyChannel.RequestType {
|
||||
case "flashduty":
|
||||
@@ -447,45 +544,53 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
|
||||
}
|
||||
|
||||
for i := range flashDutyChannelIDs {
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
}
|
||||
return
|
||||
|
||||
case "pagerduty":
|
||||
for _, routingKey := range pagerdutyRoutingKeys {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
|
||||
}
|
||||
|
||||
case "http":
|
||||
if e.notifyChannelCache.HttpConcurrencyAdd(notifyChannel.ID) {
|
||||
defer e.notifyChannelCache.HttpConcurrencyDone(notifyChannel.ID)
|
||||
}
|
||||
if notifyChannel.RequestConfig == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, request config not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
// 使用队列模式处理 http 通知
|
||||
// 创建通知任务
|
||||
task := &memsto.NotifyTask{
|
||||
Events: events,
|
||||
NotifyRuleId: notifyRuleId,
|
||||
NotifyChannel: notifyChannel,
|
||||
TplContent: tplContent,
|
||||
CustomParams: customParams,
|
||||
Sendtos: sendtos,
|
||||
}
|
||||
|
||||
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, http request config not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
}
|
||||
|
||||
if NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
|
||||
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos, resp, err)
|
||||
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), resp, err)
|
||||
} else {
|
||||
for i := range sendtos {
|
||||
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, []string{sendtos[i]}), resp, err)
|
||||
}
|
||||
// 将任务加入队列
|
||||
success := notifyChannelCache.EnqueueNotifyTask(task)
|
||||
if !success {
|
||||
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
|
||||
// 如果入队失败,记录错误通知
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
|
||||
}
|
||||
|
||||
case "smtp":
|
||||
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, e.notifyChannelCache.GetSmtpClient(notifyChannel.ID))
|
||||
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
|
||||
|
||||
case "script":
|
||||
start := time.Now()
|
||||
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
default:
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,6 +603,11 @@ func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
|
||||
// event: 告警/恢复事件
|
||||
// isSubscribe: 告警事件是否由subscribe的配置产生
|
||||
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
|
||||
go e.HandleEventWithNotifyRule(event)
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
rule := e.alertRuleCache.Get(event.RuleId)
|
||||
if rule == nil {
|
||||
return
|
||||
@@ -530,7 +640,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
|
||||
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
|
||||
}
|
||||
|
||||
go e.HandleEventWithNotifyRule(event)
|
||||
go e.Send(rule, event, notifyTarget, isSubscribe)
|
||||
|
||||
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
|
||||
@@ -570,6 +679,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
|
||||
return
|
||||
}
|
||||
|
||||
if !sub.MatchCate(event.Cate) {
|
||||
return
|
||||
}
|
||||
|
||||
if !common.MatchTags(event.TagsMap, sub.ITags) {
|
||||
return
|
||||
}
|
||||
@@ -621,7 +734,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
|
||||
event = msgCtx.Events[0]
|
||||
}
|
||||
|
||||
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
|
||||
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
|
||||
s.Send(msgCtx)
|
||||
}
|
||||
}
|
||||
@@ -720,12 +833,12 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
|
||||
|
||||
if len(t.Host) == 0 {
|
||||
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
continue
|
||||
}
|
||||
for _, host := range t.Host {
|
||||
sender.CallIbex(e.ctx, t.TplId, host,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,17 +18,18 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
|
||||
}
|
||||
|
||||
logger.Infof(
|
||||
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d %s",
|
||||
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
event.RuleId,
|
||||
event.Hash,
|
||||
status,
|
||||
location,
|
||||
event.RuleId,
|
||||
event.SubRuleId,
|
||||
event.NotifyRuleIds,
|
||||
event.Cluster,
|
||||
event.TagsJSON,
|
||||
event.TriggerValue,
|
||||
event.TriggerTime,
|
||||
event.LastEvalTime,
|
||||
message,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
|
||||
}
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.PluginType != ruleType {
|
||||
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
|
||||
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
|
||||
for _, dsId := range dsIds {
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
@@ -24,6 +25,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
promql2 "github.com/ccfos/nightingale/v6/pkg/promql"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/unit"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/prometheus/common/model"
|
||||
@@ -60,6 +62,7 @@ const (
|
||||
CHECK_QUERY = "check_query_config"
|
||||
GET_CLIENT = "get_client"
|
||||
QUERY_DATA = "query_data"
|
||||
EXEC_TEMPLATE = "exec_template"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -106,7 +109,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
}
|
||||
|
||||
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
|
||||
@@ -144,14 +147,24 @@ func (arw *AlertRuleWorker) Start() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Eval() {
|
||||
logger.Infof("eval:%s started", arw.Key())
|
||||
begin := time.Now()
|
||||
var message string
|
||||
|
||||
defer func() {
|
||||
if len(message) == 0 {
|
||||
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
|
||||
} else {
|
||||
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
|
||||
}
|
||||
}()
|
||||
|
||||
if arw.Processor.PromEvalInterval == 0 {
|
||||
arw.Processor.PromEvalInterval = getPromEvalInterval(arw.Processor.ScheduleEntry.Schedule)
|
||||
}
|
||||
|
||||
cachedRule := arw.Rule
|
||||
if cachedRule == nil {
|
||||
// logger.Errorf("rule_eval:%s Rule not found", arw.Key())
|
||||
message = "rule not found"
|
||||
return
|
||||
}
|
||||
arw.Processor.Stats.CounterRuleEval.WithLabelValues().Inc()
|
||||
@@ -176,12 +189,12 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
|
||||
message = fmt.Sprintf("failed to get anomaly points: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if arw.Processor == nil {
|
||||
logger.Warningf("rule_eval:%s Processor is nil", arw.Key())
|
||||
message = "processor is nil"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -223,7 +236,7 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
logger.Infof("rule_eval %s stopped", arw.Key())
|
||||
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
|
||||
close(arw.Quit)
|
||||
c := arw.Scheduler.Stop()
|
||||
<-c.Done()
|
||||
@@ -239,7 +252,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.PromRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -250,7 +263,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -265,7 +278,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
|
||||
|
||||
if readerClient == nil {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -275,7 +288,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
|
||||
if query.VarEnabled {
|
||||
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
|
||||
var anomalyPoints []models.AnomalyPoint
|
||||
if hasLabelLossAggregator(query) || notExactMatch(query) {
|
||||
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低
|
||||
@@ -301,13 +314,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 无变量
|
||||
promql := strings.TrimSpace(query.PromQl)
|
||||
if promql == "" {
|
||||
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
if arw.PromClients.IsNil(arw.DatasourceId) {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -316,7 +329,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -328,12 +341,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
|
||||
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
|
||||
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -427,14 +440,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
|
||||
continue
|
||||
}
|
||||
seqVals := getSamples(value)
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
continue
|
||||
}
|
||||
// 判断哪些参数值符合条件
|
||||
@@ -567,14 +580,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
case "host":
|
||||
hostIdents, err := arw.getHostIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
break
|
||||
}
|
||||
params = hostIdents
|
||||
case "device":
|
||||
deviceIdents, err := arw.getDeviceIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
break
|
||||
}
|
||||
params = deviceIdents
|
||||
@@ -583,12 +596,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
var query []string
|
||||
err := json.Unmarshal(q, &query)
|
||||
if err != nil {
|
||||
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
|
||||
}
|
||||
if len(query) == 0 {
|
||||
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
|
||||
}
|
||||
params = paramsKeyAllLabel
|
||||
} else {
|
||||
@@ -602,7 +615,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
|
||||
}
|
||||
|
||||
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
|
||||
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
|
||||
paramMap[paramKey] = params
|
||||
}
|
||||
|
||||
@@ -753,7 +766,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.HostRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -764,7 +777,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -787,7 +800,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
|
||||
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -795,7 +808,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
idents = append(idents, engineIdents...)
|
||||
@@ -822,7 +835,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
"",
|
||||
).Set(float64(len(missTargets)))
|
||||
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
targets := arw.Processor.TargetCache.Gets(missTargets)
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
@@ -831,7 +844,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
m["ident"] = target.Ident
|
||||
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
|
||||
}
|
||||
case "offset":
|
||||
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
@@ -841,7 +854,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -860,7 +873,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
if target, exists := targetMap[ident]; exists {
|
||||
if now-target.UpdateAt > 120 {
|
||||
if now-target.BeatTime > 120 {
|
||||
// means this target is not a active host, do not check offset
|
||||
continue
|
||||
}
|
||||
@@ -872,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
|
||||
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -899,7 +912,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -911,7 +924,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -1066,15 +1079,15 @@ func exclude(reHashTagIndex1 map[uint64][][]uint64, reHashTagIndex2 map[uint64][
|
||||
|
||||
func MakeSeriesMap(series []models.DataResp, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) {
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
seriesStore[seriesHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1107,7 +1120,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
|
||||
// 有 join 条件,按条件依次合并
|
||||
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
|
||||
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1143,7 +1156,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
lastRehashed = exclude(curRehashed, lastRehashed)
|
||||
last = flatten(lastRehashed)
|
||||
default:
|
||||
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
}
|
||||
}
|
||||
return last
|
||||
@@ -1263,7 +1276,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1291,10 +1304,10 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
|
||||
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
|
||||
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
if len(points) == 0 {
|
||||
@@ -1433,7 +1446,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
recoverPoints := []models.AnomalyPoint{}
|
||||
ruleConfig := strings.TrimSpace(rule.RuleConfig)
|
||||
if ruleConfig == "" {
|
||||
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1441,15 +1454,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
"",
|
||||
).Set(0)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
}
|
||||
|
||||
var ruleQuery models.RuleQuery
|
||||
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
}
|
||||
|
||||
arw.Inhibit = ruleQuery.Inhibit
|
||||
@@ -1461,7 +1474,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
|
||||
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1470,14 +1483,24 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-2)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
}
|
||||
|
||||
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-3)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
|
||||
series, err := plug.QueryData(ctx, query)
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1485,7 +1508,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-1)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
}
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1495,21 +1518,21 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
).Set(float64(len(series)))
|
||||
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
|
||||
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
seriesStore[seriesHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
|
||||
}
|
||||
ref, err := GetQueryRef(query)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
|
||||
continue
|
||||
}
|
||||
seriesTagIndexes[ref] = seriesTagIndex
|
||||
@@ -1519,7 +1542,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
for _, query := range ruleQuery.Queries {
|
||||
ref, unit, err := GetQueryRefAndUnit(query)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
|
||||
continue
|
||||
}
|
||||
unitMap[ref] = unit
|
||||
@@ -1539,15 +1562,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
var ts int64
|
||||
var sample models.DataResp
|
||||
var value float64
|
||||
for _, serieHash := range seriesHash {
|
||||
series, exists := seriesStore[serieHash]
|
||||
for _, seriesHash := range seriesHash {
|
||||
series, exists := seriesStore[seriesHash]
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
|
||||
continue
|
||||
}
|
||||
t, v, exists := series.Last()
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1578,12 +1601,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
ts = int64(t)
|
||||
sample = series
|
||||
value = v
|
||||
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
|
||||
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
|
||||
}
|
||||
|
||||
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
|
||||
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
|
||||
|
||||
var values string
|
||||
for k, v := range m {
|
||||
@@ -1591,11 +1614,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
continue
|
||||
}
|
||||
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
if u, exists := valuesUnitMap[k]; exists { // 配置了单位,优先用配置了单位的值
|
||||
values += fmt.Sprintf("%s:%s ", k, u.Text)
|
||||
} else {
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1652,7 +1679,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
// 检查是否超过 resolve_after 时间
|
||||
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
|
||||
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
delete(arw.LastSeriesStore, hash)
|
||||
continue
|
||||
}
|
||||
@@ -1673,7 +1700,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
TriggerType: models.TriggerTypeNodata,
|
||||
}
|
||||
points = append(points, point)
|
||||
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
|
||||
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1688,3 +1715,61 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
return points, recoverPoints, nil
|
||||
}
|
||||
|
||||
// ExecuteQueryTemplate 根据数据源类型对 Query 进行模板渲染处理
|
||||
// cate: 数据源类别,如 "mysql", "pgsql" 等
|
||||
// query: 查询对象,如果是数据库类型的数据源,会处理其中的 sql 字段
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteQueryTemplate(cate string, query interface{}, data interface{}) error {
|
||||
// 检查 query 是否是 map,且包含 sql 字段
|
||||
queryMap, ok := query.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlVal, exists := queryMap["sql"]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlStr, ok := sqlVal.(string)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 调用 ExecuteSqlTemplate 处理 sql 字段
|
||||
processedSQL, err := ExecuteSqlTemplate(sqlStr, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("execute sql template error: %w", err)
|
||||
}
|
||||
|
||||
// 更新 query 中的 sql 字段
|
||||
queryMap["sql"] = processedSQL
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExecuteSqlTemplate 执行 query 中的 golang 模板语法函数
|
||||
// query: 要处理的 query 字符串
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteSqlTemplate(query string, data interface{}) (string, error) {
|
||||
if !strings.Contains(query, "{{") || !strings.Contains(query, "}}") {
|
||||
return query, nil
|
||||
}
|
||||
|
||||
tmpl, err := template.New("query").Funcs(tplx.TemplateFuncMap).Parse(query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("query tmpl parse error: %w", err)
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
templateData := data
|
||||
if templateData == nil {
|
||||
templateData = struct{}{}
|
||||
}
|
||||
|
||||
if err := tmpl.Execute(&buf, templateData); err != nil {
|
||||
return "", fmt.Errorf("query tmpl execute error: %w", err)
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mute
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -9,6 +10,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -39,8 +41,28 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
|
||||
|
||||
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
|
||||
// 时间范围,左闭右开,默认范围:00:00-24:00
|
||||
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
|
||||
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
// 确定使用的时区
|
||||
var targetLoc *time.Location
|
||||
var err error
|
||||
|
||||
timezone := rule.TimeZone
|
||||
if timezone == "" {
|
||||
// 如果时区为空,使用系统时区(保持原有逻辑)
|
||||
targetLoc = time.Local
|
||||
} else {
|
||||
// 加载规则配置的时区
|
||||
targetLoc, err = time.LoadLocation(timezone)
|
||||
if err != nil {
|
||||
// 如果时区加载失败,记录错误并使用系统时区
|
||||
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
|
||||
targetLoc = time.Local
|
||||
}
|
||||
}
|
||||
|
||||
// 将触发时间转换到目标时区
|
||||
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := strconv.Itoa(int(tm.Weekday()))
|
||||
|
||||
@@ -100,7 +122,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
|
||||
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
|
||||
// 这里的判断有点太粗暴了,但是目前没有更好的办法
|
||||
if !exists && strings.Contains(rule.PromQl, "target_up") {
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
|
||||
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -122,7 +144,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
|
||||
// 对于包含ident的告警事件,check一下ident所属bg和rule所属bg是否相同
|
||||
// 如果告警规则选择了只在本BG生效,那其他BG的机器就不能因此规则产生告警
|
||||
if exists && !target.MatchGroupId(rule.GroupId) {
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
|
||||
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -135,7 +157,8 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
|
||||
}
|
||||
|
||||
for i := 0; i < len(mutes); i++ {
|
||||
if MatchMute(event, mutes[i]) {
|
||||
matched, _ := MatchMute(event, mutes[i])
|
||||
if matched {
|
||||
return true, mutes[i].Id
|
||||
}
|
||||
}
|
||||
@@ -144,27 +167,21 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
|
||||
}
|
||||
|
||||
// MatchMute 如果传入了clock这个可选参数,就表示使用这个clock表示的时间,否则就从event的字段中取TriggerTime
|
||||
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
|
||||
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) (bool, error) {
|
||||
if mute.Disabled == 1 {
|
||||
return false
|
||||
return false, errors.New("mute is disabled")
|
||||
}
|
||||
|
||||
// 如果不是全局的,判断 匹配的 datasource id
|
||||
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
|
||||
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
|
||||
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
|
||||
idm[mute.DatasourceIdsJson[i]] = struct{}{}
|
||||
}
|
||||
|
||||
// 判断 event.datasourceId 是否包含在 idm 中
|
||||
if _, has := idm[event.DatasourceId]; !has {
|
||||
return false
|
||||
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
|
||||
return false, errors.New("datasource id not match")
|
||||
}
|
||||
}
|
||||
|
||||
if mute.MuteTimeType == models.TimeRange {
|
||||
if !mute.IsWithinTimeRange(event.TriggerTime) {
|
||||
return false
|
||||
return false, errors.New("event trigger time not within mute time range")
|
||||
}
|
||||
} else if mute.MuteTimeType == models.Periodic {
|
||||
ts := event.TriggerTime
|
||||
@@ -173,11 +190,11 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
}
|
||||
|
||||
if !mute.IsWithinPeriodicMute(ts) {
|
||||
return false
|
||||
return false, errors.New("event trigger time not within periodic mute range")
|
||||
}
|
||||
} else {
|
||||
logger.Warningf("mute time type invalid, %d", mute.MuteTimeType)
|
||||
return false
|
||||
return false, errors.New("mute time type invalid")
|
||||
}
|
||||
|
||||
var matchSeverity bool
|
||||
@@ -193,12 +210,14 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
}
|
||||
|
||||
if !matchSeverity {
|
||||
return false
|
||||
return false, errors.New("event severity not match mute severity")
|
||||
}
|
||||
|
||||
if mute.ITags == nil || len(mute.ITags) == 0 {
|
||||
return true
|
||||
if len(mute.ITags) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return common.MatchTags(event.TagsMap, mute.ITags)
|
||||
if !common.MatchTags(event.TagsMap, mute.ITags) {
|
||||
return false, errors.New("event tags not match mute tags")
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ func (n *Naming) heartbeat() error {
|
||||
newDatasource[datasourceIds[i]] = struct{}{}
|
||||
servers, err := n.ActiveServers(datasourceIds[i])
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
logger.Warningf("heartbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
continue
|
||||
}
|
||||
@@ -148,7 +148,7 @@ func (n *Naming) heartbeat() error {
|
||||
|
||||
servers, err := n.ActiveServersByEngineName()
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
|
||||
logger.Warningf("heartbeat %d get active server err:%v", HostDatasource, err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
return nil
|
||||
}
|
||||
|
||||
380
alert/pipeline/engine/engine.go
Normal file
380
alert/pipeline/engine/engine.go
Normal file
@@ -0,0 +1,380 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type WorkflowEngine struct {
|
||||
ctx *ctx.Context
|
||||
}
|
||||
|
||||
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
|
||||
return &WorkflowEngine{ctx: c}
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
|
||||
|
||||
nodes := pipeline.GetWorkflowNodes()
|
||||
connections := pipeline.GetWorkflowConnections()
|
||||
|
||||
if len(nodes) == 0 {
|
||||
return event, &models.WorkflowResult{
|
||||
Event: event,
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
Message: "no nodes to execute",
|
||||
}, nil
|
||||
}
|
||||
|
||||
nodeMap := make(map[string]*models.WorkflowNode)
|
||||
for i := range nodes {
|
||||
if nodes[i].RetryInterval == 0 {
|
||||
nodes[i].RetryInterval = 1
|
||||
}
|
||||
|
||||
if nodes[i].MaxRetries == 0 {
|
||||
nodes[i].MaxRetries = 1
|
||||
}
|
||||
|
||||
nodeMap[nodes[i].ID] = &nodes[i]
|
||||
}
|
||||
|
||||
result := e.executeDAG(nodeMap, connections, wfCtx)
|
||||
result.Event = wfCtx.Event
|
||||
|
||||
duration := time.Since(startTime).Milliseconds()
|
||||
|
||||
if triggerCtx != nil && triggerCtx.Mode != "" {
|
||||
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
|
||||
}
|
||||
|
||||
return wfCtx.Event, result, nil
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
|
||||
// 合并输入参数
|
||||
inputs := pipeline.GetInputsMap()
|
||||
if triggerCtx != nil && triggerCtx.InputsOverrides != nil {
|
||||
for k, v := range triggerCtx.InputsOverrides {
|
||||
inputs[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
metadata := map[string]string{
|
||||
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
|
||||
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
|
||||
}
|
||||
|
||||
// 是否启用流式输出
|
||||
stream := false
|
||||
if triggerCtx != nil {
|
||||
metadata["request_id"] = triggerCtx.RequestID
|
||||
metadata["trigger_mode"] = triggerCtx.Mode
|
||||
metadata["trigger_by"] = triggerCtx.TriggerBy
|
||||
stream = triggerCtx.Stream
|
||||
}
|
||||
|
||||
return &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: inputs,
|
||||
Vars: make(map[string]interface{}), // 初始化空的 Vars,供节点间传递数据
|
||||
Metadata: metadata,
|
||||
Stream: stream,
|
||||
}
|
||||
}
|
||||
|
||||
// executeDAG 使用 Kahn 算法执行 DAG
|
||||
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
|
||||
result := &models.WorkflowResult{
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
NodeResults: make([]*models.NodeExecutionResult, 0),
|
||||
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
|
||||
}
|
||||
|
||||
// 计算每个节点的入度
|
||||
inDegree := make(map[string]int)
|
||||
for nodeID := range nodeMap {
|
||||
inDegree[nodeID] = 0
|
||||
}
|
||||
|
||||
// 遍历连接,计算入度
|
||||
for _, nodeConns := range connections {
|
||||
for _, targets := range nodeConns.Main {
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 找到所有入度为 0 的节点(起始节点)
|
||||
queue := make([]string, 0)
|
||||
for nodeID, degree := range inDegree {
|
||||
if degree == 0 {
|
||||
queue = append(queue, nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有起始节点,说明存在循环依赖
|
||||
if len(queue) == 0 && len(nodeMap) > 0 {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.Message = "workflow has circular dependency"
|
||||
return result
|
||||
}
|
||||
|
||||
// 记录已执行的节点
|
||||
executed := make(map[string]bool)
|
||||
// 记录节点的分支选择结果
|
||||
branchResults := make(map[string]*int)
|
||||
|
||||
for len(queue) > 0 {
|
||||
// 取出队首节点
|
||||
nodeID := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
// 检查是否已执行
|
||||
if executed[nodeID] {
|
||||
continue
|
||||
}
|
||||
|
||||
node, exists := nodeMap[nodeID]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// 执行节点
|
||||
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
|
||||
result.NodeResults = append(result.NodeResults, nodeResult)
|
||||
|
||||
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
|
||||
// 流式输出节点通常是最后一个节点
|
||||
// 直接传递 StreamChan 给 WorkflowResult,不阻塞等待
|
||||
result.Stream = true
|
||||
result.StreamChan = nodeOutput.StreamChan
|
||||
result.Event = wfCtx.Event
|
||||
result.Status = "streaming"
|
||||
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
|
||||
|
||||
// 更新节点状态为 streaming
|
||||
nodeResult.Status = "streaming"
|
||||
nodeResult.Message = "streaming in progress"
|
||||
|
||||
// 立即返回,让 API 层处理流式响应
|
||||
return result
|
||||
}
|
||||
executed[nodeID] = true
|
||||
|
||||
// 保存分支结果
|
||||
if nodeResult.BranchIndex != nil {
|
||||
branchResults[nodeID] = nodeResult.BranchIndex
|
||||
}
|
||||
|
||||
// 检查执行状态
|
||||
if nodeResult.Status == "failed" {
|
||||
if !node.ContinueOnFail {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.ErrorNode = nodeID
|
||||
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
|
||||
}
|
||||
}
|
||||
|
||||
// 检查是否终止
|
||||
if nodeResult.Status == "terminated" {
|
||||
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
|
||||
return result
|
||||
}
|
||||
|
||||
// 更新后继节点的入度
|
||||
if nodeConns, ok := connections[nodeID]; ok {
|
||||
for outputIndex, targets := range nodeConns.Main {
|
||||
// 检查是否应该走这个分支
|
||||
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]--
|
||||
if inDegree[target.Node] == 0 {
|
||||
queue = append(queue, target.Node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// executeNode 执行单个节点
|
||||
// 返回:节点执行结果、节点输出(用于流式输出检测)
|
||||
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
|
||||
startTime := time.Now()
|
||||
nodeResult := &models.NodeExecutionResult{
|
||||
NodeID: node.ID,
|
||||
NodeName: node.Name,
|
||||
NodeType: node.Type,
|
||||
StartedAt: startTime.Unix(),
|
||||
}
|
||||
|
||||
var nodeOutput *models.NodeOutput
|
||||
|
||||
// 跳过禁用的节点
|
||||
if node.Disabled {
|
||||
nodeResult.Status = "skipped"
|
||||
nodeResult.Message = "node is disabled"
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 获取处理器
|
||||
processor, err := models.GetProcessorByType(node.Type, node.Config)
|
||||
if err != nil {
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 执行处理器(带重试)
|
||||
var retries int
|
||||
maxRetries := node.MaxRetries
|
||||
if !node.RetryOnFail {
|
||||
maxRetries = 0
|
||||
}
|
||||
|
||||
for retries <= maxRetries {
|
||||
// 检查是否为分支处理器
|
||||
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
|
||||
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
if output != nil {
|
||||
nodeOutput = output
|
||||
if output.WfCtx != nil {
|
||||
wfCtx = output.WfCtx
|
||||
}
|
||||
nodeResult.Message = output.Message
|
||||
nodeResult.BranchIndex = output.BranchIndex
|
||||
if output.Terminate {
|
||||
nodeResult.Status = "terminated"
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// 普通处理器
|
||||
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
nodeResult.Message = msg
|
||||
if newWfCtx != nil {
|
||||
wfCtx = newWfCtx
|
||||
|
||||
// 检测流式输出标记
|
||||
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
|
||||
nodeOutput = &models.NodeOutput{
|
||||
WfCtx: newWfCtx,
|
||||
Message: msg,
|
||||
Stream: true,
|
||||
StreamChan: newWfCtx.StreamChan,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果事件被 drop(返回 nil 或 Event 为 nil),标记为终止
|
||||
if newWfCtx == nil || newWfCtx.Event == nil {
|
||||
nodeResult.Status = "terminated"
|
||||
nodeResult.Message = msg
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
|
||||
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
|
||||
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
|
||||
|
||||
return nodeResult, nodeOutput
|
||||
}
|
||||
|
||||
// shouldFollowBranch 判断是否应该走某个分支
|
||||
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
|
||||
branchIndex, hasBranch := branchResults[nodeID]
|
||||
if !hasBranch {
|
||||
// 没有分支结果,说明不是分支节点,只走第一个输出
|
||||
return outputIndex == 0
|
||||
}
|
||||
|
||||
if branchIndex == nil {
|
||||
// branchIndex 为 nil,走默认分支(通常是最后一个)
|
||||
return true
|
||||
}
|
||||
|
||||
// 只走选中的分支
|
||||
return outputIndex == *branchIndex
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
|
||||
executionID := triggerCtx.RequestID
|
||||
if executionID == "" {
|
||||
executionID = uuid.New().String()
|
||||
}
|
||||
|
||||
execution := &models.EventPipelineExecution{
|
||||
ID: executionID,
|
||||
PipelineID: pipeline.ID,
|
||||
PipelineName: pipeline.Name,
|
||||
Mode: triggerCtx.Mode,
|
||||
Status: result.Status,
|
||||
ErrorMessage: result.Message,
|
||||
ErrorNode: result.ErrorNode,
|
||||
CreatedAt: startTime,
|
||||
FinishedAt: time.Now().Unix(),
|
||||
DurationMs: duration,
|
||||
TriggerBy: triggerCtx.TriggerBy,
|
||||
}
|
||||
|
||||
if wfCtx.Event != nil {
|
||||
execution.EventID = wfCtx.Event.Id
|
||||
}
|
||||
|
||||
if err := execution.SetNodeResults(result.NodeResults); err != nil {
|
||||
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := execution.SetInputsSnapshot(wfCtx.Inputs); err != nil {
|
||||
logger.Errorf("workflow: failed to set inputs snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
|
||||
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/aisummary"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
|
||||
)
|
||||
|
||||
|
||||
246
alert/pipeline/processor/aisummary/ai_summary.go
Normal file
246
alert/pipeline/processor/aisummary/ai_summary.go
Normal file
@@ -0,0 +1,246 @@
|
||||
package aisummary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
const (
|
||||
HTTP_STATUS_SUCCESS_MAX = 299
|
||||
)
|
||||
|
||||
// AISummaryConfig 配置结构体
|
||||
type AISummaryConfig struct {
|
||||
callback.HTTPConfig
|
||||
ModelName string `json:"model_name"`
|
||||
APIKey string `json:"api_key"`
|
||||
PromptTemplate string `json:"prompt_template"`
|
||||
CustomParams map[string]interface{} `json:"custom_params"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ChatCompletionResponse struct {
|
||||
Choices []struct {
|
||||
Message struct {
|
||||
Content string `json:"content"`
|
||||
} `json:"message"`
|
||||
} `json:"choices"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("ai_summary", &AISummaryConfig{})
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*AISummaryConfig](settings)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
if err := c.initHTTPClient(); err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备告警事件信息
|
||||
eventInfo, err := c.prepareEventInfo(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 调用AI模型生成总结
|
||||
summary, err := c.generateAISummary(eventInfo)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 将总结添加到annotations字段
|
||||
if event.AnnotationsJSON == nil {
|
||||
event.AnnotationsJSON = make(map[string]string)
|
||||
}
|
||||
event.AnnotationsJSON["ai_summary"] = summary
|
||||
|
||||
// 更新Annotations字段
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
}
|
||||
event.Annotations = string(b)
|
||||
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) initHTTPClient() error {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
}
|
||||
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse proxy url: %v", err)
|
||||
}
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
|
||||
c.Client = &http.Client{
|
||||
Timeout: time.Duration(c.Timeout) * time.Millisecond,
|
||||
Transport: transport,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
|
||||
var defs = []string{
|
||||
"{{$event := .Event}}",
|
||||
"{{$inputs := .Inputs}}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.PromptTemplate), "")
|
||||
t, err := template.New("prompt").Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse prompt template: %v", err)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
err = t.Execute(&body, wfCtx)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to execute prompt template: %v", err)
|
||||
}
|
||||
|
||||
return body.String(), nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
|
||||
// 构建基础请求参数
|
||||
reqParams := map[string]interface{}{
|
||||
"model": c.ModelName,
|
||||
"messages": []Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: eventInfo,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// 合并自定义参数
|
||||
for k, v := range c.CustomParams {
|
||||
converted, err := convertCustomParam(v)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
|
||||
}
|
||||
reqParams[k] = converted
|
||||
}
|
||||
|
||||
// 序列化请求体
|
||||
jsonData, err := json.Marshal(reqParams)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to marshal request body: %v", err)
|
||||
}
|
||||
|
||||
// 创建HTTP请求
|
||||
req, err := http.NewRequest("POST", c.URL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
// 设置请求头
|
||||
req.Header.Set("Authorization", "Bearer "+c.APIKey)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
for k, v := range c.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
// 发送请求
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 检查响应状态码
|
||||
if resp.StatusCode > HTTP_STATUS_SUCCESS_MAX {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return "", fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
// 读取响应
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response body: %v", err)
|
||||
}
|
||||
|
||||
// 解析响应
|
||||
var chatResp ChatCompletionResponse
|
||||
if err := json.Unmarshal(body, &chatResp); err != nil {
|
||||
return "", fmt.Errorf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
if len(chatResp.Choices) == 0 {
|
||||
return "", fmt.Errorf("no response from AI model")
|
||||
}
|
||||
|
||||
return chatResp.Choices[0].Message.Content, nil
|
||||
}
|
||||
|
||||
// convertCustomParam 将前端传入的参数转换为正确的类型
|
||||
func convertCustomParam(value interface{}) (interface{}, error) {
|
||||
if value == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// 如果是字符串,尝试转换为其他类型
|
||||
if str, ok := value.(string); ok {
|
||||
// 尝试转换为数字
|
||||
if f, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
// 检查是否为整数
|
||||
if f == float64(int64(f)) {
|
||||
return int64(f), nil
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// 尝试转换为布尔值
|
||||
if b, err := strconv.ParseBool(str); err == nil {
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// 尝试解析为JSON数组
|
||||
if strings.HasPrefix(strings.TrimSpace(str), "[") {
|
||||
var arr []interface{}
|
||||
if err := json.Unmarshal([]byte(str), &arr); err == nil {
|
||||
return arr, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 尝试解析为JSON对象
|
||||
if strings.HasPrefix(strings.TrimSpace(str), "{") {
|
||||
var obj map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(str), &obj); err == nil {
|
||||
return obj, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return value, nil
|
||||
}
|
||||
145
alert/pipeline/processor/aisummary/ai_summary_test.go
Normal file
145
alert/pipeline/processor/aisummary/ai_summary_test.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package aisummary
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestAISummaryConfig_Process(t *testing.T) {
|
||||
// 创建测试配置
|
||||
config := &AISummaryConfig{
|
||||
HTTPConfig: callback.HTTPConfig{
|
||||
URL: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||
Timeout: 30000,
|
||||
SkipSSLVerify: true,
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
},
|
||||
ModelName: "gemini-2.0-flash",
|
||||
APIKey: "*",
|
||||
PromptTemplate: "告警规则:{{$event.RuleName}}\n严重程度:{{$event.Severity}}",
|
||||
CustomParams: map[string]interface{}{
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 2000,
|
||||
"top_p": 0.9,
|
||||
},
|
||||
}
|
||||
|
||||
// 创建测试事件
|
||||
event := &models.AlertCurEvent{
|
||||
RuleName: "Test Rule",
|
||||
Severity: 1,
|
||||
TagsMap: map[string]string{
|
||||
"host": "test-host",
|
||||
},
|
||||
AnnotationsJSON: map[string]string{
|
||||
"description": "Test alert",
|
||||
},
|
||||
}
|
||||
|
||||
// 创建 WorkflowContext
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: map[string]string{},
|
||||
}
|
||||
|
||||
// 测试模板处理
|
||||
eventInfo, err := config.prepareEventInfo(wfCtx)
|
||||
assert.NoError(t, err)
|
||||
assert.Contains(t, eventInfo, "Test Rule")
|
||||
assert.Contains(t, eventInfo, "1")
|
||||
|
||||
// 测试配置初始化
|
||||
processor, err := config.Init(config)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, processor)
|
||||
|
||||
// 测试处理函数
|
||||
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
|
||||
|
||||
// 展示处理结果
|
||||
t.Log("\n=== 处理结果 ===")
|
||||
t.Logf("告警规则: %s", result.Event.RuleName)
|
||||
t.Logf("严重程度: %d", result.Event.Severity)
|
||||
t.Logf("标签: %v", result.Event.TagsMap)
|
||||
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
|
||||
}
|
||||
|
||||
func TestConvertCustomParam(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input interface{}
|
||||
expected interface{}
|
||||
hasError bool
|
||||
}{
|
||||
{
|
||||
name: "nil value",
|
||||
input: nil,
|
||||
expected: nil,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string number to int64",
|
||||
input: "123",
|
||||
expected: int64(123),
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string float to float64",
|
||||
input: "123.45",
|
||||
expected: 123.45,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string boolean to bool",
|
||||
input: "true",
|
||||
expected: true,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "string false to bool",
|
||||
input: "false",
|
||||
expected: false,
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "JSON array string to slice",
|
||||
input: `["a", "b", "c"]`,
|
||||
expected: []interface{}{"a", "b", "c"},
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "JSON object string to map",
|
||||
input: `{"key": "value", "num": 123}`,
|
||||
expected: map[string]interface{}{"key": "value", "num": float64(123)},
|
||||
hasError: false,
|
||||
},
|
||||
{
|
||||
name: "plain string remains string",
|
||||
input: "hello world",
|
||||
expected: "hello world",
|
||||
hasError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
converted, err := convertCustomParam(test.input)
|
||||
if test.hasError {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.expected, converted)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package callback
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/utils"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
@@ -42,7 +44,8 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -51,7 +54,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to parse proxy url: %v", err)
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -69,16 +72,19 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
headers[k] = v
|
||||
}
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
url, err := utils.TplRender(wfCtx, c.URL)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal event: %v", err)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to render url template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create request: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -91,16 +97,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send request: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to read response body: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
logger.Infof("response body: %s", string(b))
|
||||
return event
|
||||
logger.Debugf("callback processor response body: %s", string(b))
|
||||
return wfCtx, "callback success", nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package eventdrop
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
texttemplate "text/template"
|
||||
|
||||
@@ -25,37 +26,38 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
|
||||
// 在标签过滤和属性过滤都不满足需求时可以使用
|
||||
// 如果模板执行结果为 true,则删除该事件
|
||||
event := wfCtx.Event
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := . }}",
|
||||
"{{ $labels := .TagsMap }}",
|
||||
"{{ $value := .TriggerValue }}",
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Content), "")
|
||||
|
||||
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
logger.Errorf("processor failed to parse template: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, event); err != nil {
|
||||
logger.Errorf("processor failed to execute template: %v event: %v", err, event)
|
||||
return event
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(body.String())
|
||||
logger.Infof("processor eventdrop result: %v", result)
|
||||
if result == "true" {
|
||||
logger.Infof("processor eventdrop drop event: %v", event)
|
||||
return nil
|
||||
wfCtx.Event = nil
|
||||
logger.Infof("processor eventdrop drop event: %s", event.Hash)
|
||||
return wfCtx, "drop event success", nil
|
||||
}
|
||||
|
||||
return event
|
||||
return wfCtx, "drop event failed", nil
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package eventupdate
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -30,7 +31,8 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -39,7 +41,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to parse proxy url: %v", err)
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -59,14 +61,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal event: %v", err)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to create request: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -79,22 +79,19 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to send request: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to read response body: %v event: %v", err, event)
|
||||
return event
|
||||
return nil, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
logger.Infof("response body: %s", string(b))
|
||||
logger.Debugf("event update processor response body: %s", string(b))
|
||||
|
||||
err = json.Unmarshal(b, &event)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to unmarshal response body: %v event: %v", err, event)
|
||||
return event
|
||||
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
return event
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
197
alert/pipeline/processor/logic/if.go
Normal file
197
alert/pipeline/processor/logic/if.go
Normal file
@@ -0,0 +1,197 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// 判断模式常量
|
||||
const (
|
||||
ConditionModeExpression = "expression" // 表达式模式(默认)
|
||||
ConditionModeTags = "tags" // 标签/属性模式
|
||||
)
|
||||
|
||||
// IfConfig If 条件处理器配置
|
||||
type IfConfig struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
// 例如:{{ if eq .Severity 1 }}true{{ end }}
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.if", &IfConfig{})
|
||||
}
|
||||
|
||||
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*IfConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析标签过滤器
|
||||
if len(result.LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
|
||||
copy(labelKeysCopy, result.LabelKeys)
|
||||
for i := range labelKeysCopy {
|
||||
if labelKeysCopy[i].Func == "" {
|
||||
labelKeysCopy[i].Func = labelKeysCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 解析属性过滤器
|
||||
if len(result.Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Attributes))
|
||||
copy(attributesCopy, result.Attributes)
|
||||
for i := range attributesCopy {
|
||||
if attributesCopy[i].Func == "" {
|
||||
attributesCopy[i].Func = attributesCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
if result {
|
||||
return wfCtx, "condition matched (true branch)", nil
|
||||
}
|
||||
return wfCtx, "condition not matched (false branch)", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if result {
|
||||
// 条件为 true,走输出 0(true 分支)
|
||||
branchIndex := 0
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition matched (true branch)"
|
||||
} else {
|
||||
// 条件为 false,走输出 1(false 分支)
|
||||
branchIndex := 1
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition not matched (false branch)"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCondition 评估条件
|
||||
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := c.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if c.Condition == "" {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 构建模板数据
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Condition), "")
|
||||
|
||||
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 true
|
||||
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(c.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(c.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
224
alert/pipeline/processor/logic/switch.go
Normal file
224
alert/pipeline/processor/logic/switch.go
Normal file
@@ -0,0 +1,224 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// SwitchCase Switch 分支定义
|
||||
type SwitchCase struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 分支名称(可选,用于日志)
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
// SwitchConfig Switch 多分支处理器配置
|
||||
type SwitchConfig struct {
|
||||
// 分支条件列表
|
||||
// 按顺序匹配,第一个为 true 的分支将被选中
|
||||
Cases []SwitchCase `json:"cases"`
|
||||
// 是否允许多个分支同时匹配(默认 false,只走第一个匹配的)
|
||||
AllowMultiple bool `json:"allow_multiple,omitempty"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.switch", &SwitchConfig{})
|
||||
}
|
||||
|
||||
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*SwitchConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析每个 case 的标签和属性过滤器
|
||||
for i := range result.Cases {
|
||||
if len(result.Cases[i].LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
|
||||
copy(labelKeysCopy, result.Cases[i].LabelKeys)
|
||||
for j := range labelKeysCopy {
|
||||
if labelKeysCopy[j].Func == "" {
|
||||
labelKeysCopy[j].Func = labelKeysCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(result.Cases[i].Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
|
||||
copy(attributesCopy, result.Cases[i].Attributes)
|
||||
for j := range attributesCopy {
|
||||
if attributesCopy[j].Func == "" {
|
||||
attributesCopy[j].Func = attributesCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
if caseName != "" {
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
|
||||
}
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
|
||||
}
|
||||
|
||||
// 走默认分支(最后一个输出)
|
||||
return wfCtx, "no case matched, using default branch", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
output.BranchIndex = &index
|
||||
if caseName != "" {
|
||||
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
|
||||
} else {
|
||||
output.Message = fmt.Sprintf("matched case[%d]", index)
|
||||
}
|
||||
} else {
|
||||
// 默认分支的索引是 cases 数量(即最后一个输出端口)
|
||||
defaultIndex := len(c.Cases)
|
||||
output.BranchIndex = &defaultIndex
|
||||
output.Message = "no case matched, using default branch"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCases 评估所有分支条件
|
||||
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
|
||||
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
|
||||
for i := range c.Cases {
|
||||
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
|
||||
if err != nil {
|
||||
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
|
||||
}
|
||||
if matched {
|
||||
return i, c.Cases[i].Name, nil
|
||||
}
|
||||
}
|
||||
return -1, "", nil
|
||||
}
|
||||
|
||||
// evaluateCaseCondition 评估单个分支条件
|
||||
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := caseItem.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if condition == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, condition), "")
|
||||
|
||||
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 false(不匹配)
|
||||
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(caseItem.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(caseItem.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
|
||||
for i := range r.SourceLabels {
|
||||
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
|
||||
@@ -63,8 +63,8 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *
|
||||
},
|
||||
}
|
||||
|
||||
EventRelabel(event, relabelConfigs)
|
||||
return event
|
||||
EventRelabel(wfCtx.Event, relabelConfigs)
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
|
||||
|
||||
32
alert/pipeline/processor/utils/utils.go
Normal file
32
alert/pipeline/processor/utils/utils.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
func TplRender(wfCtx *models.WorkflowContext, content string) (string, error) {
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
text := strings.Join(append(defs, content), "")
|
||||
tpl, err := template.New("tpl").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse template: %v", err)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return "", fmt.Errorf("failed to execute template: %v", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(body.String()), nil
|
||||
}
|
||||
@@ -26,8 +26,6 @@ import (
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
type ExternalProcessorsType struct {
|
||||
ExternalLock sync.RWMutex
|
||||
Processors map[string]*Processor
|
||||
@@ -76,7 +74,6 @@ type Processor struct {
|
||||
|
||||
HandleFireEventHook HandleEventFunc
|
||||
HandleRecoverEventHook HandleEventFunc
|
||||
EventMuteHook EventMuteHookFunc
|
||||
|
||||
ScheduleEntry cron.Entry
|
||||
PromEvalInterval int
|
||||
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
|
||||
|
||||
HandleFireEventHook: func(event *models.AlertCurEvent) {},
|
||||
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
|
||||
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
|
||||
}
|
||||
|
||||
p.mayHandleGroup()
|
||||
@@ -135,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
p.inhibit = inhibit
|
||||
cachedRule := p.alertRuleCache.Get(p.rule.Id)
|
||||
if cachedRule == nil {
|
||||
logger.Errorf("rule not found %+v", anomalyPoints)
|
||||
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
return
|
||||
}
|
||||
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
|
||||
hash := event.Hash
|
||||
alertingKeys[hash] = struct{}{}
|
||||
|
||||
// event processor
|
||||
eventCopy := event.DeepCopy()
|
||||
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
|
||||
if event == nil {
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
// event mute
|
||||
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
continue
|
||||
}
|
||||
|
||||
if p.EventMuteHook(event) {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
|
||||
if dispatch.EventMuteHook(event) {
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -241,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
|
||||
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
|
||||
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
|
||||
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
|
||||
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
|
||||
}
|
||||
|
||||
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
|
||||
@@ -266,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
|
||||
event.Target = pt
|
||||
} else {
|
||||
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
|
||||
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -365,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
|
||||
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
|
||||
if !has {
|
||||
// 说明没有产生过异常点,就不需要恢复了
|
||||
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
|
||||
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -428,17 +434,18 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
continue
|
||||
}
|
||||
|
||||
var preTriggerTime int64 // 第一个 pending event 的触发时间
|
||||
var preEvalTime int64 // 第一个 pending event 的检测时间
|
||||
preEvent, has := p.pendings.Get(event.Hash)
|
||||
if has {
|
||||
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
preTriggerTime = preEvent.TriggerTime
|
||||
preEvalTime = preEvent.FirstEvalTime
|
||||
} else {
|
||||
event.FirstEvalTime = event.LastEvalTime
|
||||
p.pendings.Set(event.Hash, event)
|
||||
preTriggerTime = event.TriggerTime
|
||||
preEvalTime = event.FirstEvalTime
|
||||
}
|
||||
|
||||
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
|
||||
if event.LastEvalTime-preEvalTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
|
||||
fireEvents = append(fireEvents, event)
|
||||
if severity > event.Severity {
|
||||
severity = event.Severity
|
||||
@@ -453,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
|
||||
for _, event := range events {
|
||||
if p.inhibit && event.Severity > highSeverity {
|
||||
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
|
||||
continue
|
||||
}
|
||||
p.fireEvent(event)
|
||||
@@ -467,16 +474,18 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
|
||||
message := "unknown"
|
||||
defer func() {
|
||||
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
|
||||
}()
|
||||
|
||||
if fired, has := p.fires.Get(event.Hash); has {
|
||||
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
event.FirstTriggerTime = fired.FirstTriggerTime
|
||||
p.HandleFireEventHook(event)
|
||||
|
||||
if cachedRule.NotifyRepeatStep == 0 {
|
||||
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
|
||||
// 说明不想重复通知,那就直接返回了,nothing to do
|
||||
// do not need to send alert again
|
||||
message = "stalled, rule.notify_repeat_step is 0, no need to repeat notify"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -485,21 +494,26 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
if cachedRule.NotifyMaxNumber == 0 {
|
||||
// 最大可以发送次数如果是0,表示不想限制最大发送次数,一直发即可
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_ignore(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
p.pushEventToQueue(event)
|
||||
} else {
|
||||
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
|
||||
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
|
||||
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
|
||||
message = fmt.Sprintf("stalled, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_not_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, fired.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
return
|
||||
} else {
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
message = fmt.Sprintf("stalled, notify_repeat_step_not_matched(%d < %d + %d * 60)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep)
|
||||
}
|
||||
} else {
|
||||
event.NotifyCurNumber = 1
|
||||
event.FirstTriggerTime = event.TriggerTime
|
||||
message = fmt.Sprintf("fired, first_trigger_time: %d", event.FirstTriggerTime)
|
||||
p.HandleFireEventHook(event)
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
@@ -513,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
|
||||
|
||||
dispatch.LogEvent(e, "push_queue")
|
||||
if !queue.EventQueue.PushFront(e) {
|
||||
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
|
||||
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -524,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
|
||||
|
||||
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
|
||||
if err != nil {
|
||||
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
p.fires = NewAlertCurEventMap(nil)
|
||||
return
|
||||
@@ -577,7 +591,9 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
|
||||
}
|
||||
|
||||
// handle rule tags
|
||||
for _, tag := range p.rule.AppendTagsJSON {
|
||||
tags := p.rule.AppendTagsJSON
|
||||
tags = append(tags, "rulename="+p.rule.Name)
|
||||
for _, tag := range tags {
|
||||
arr := strings.SplitN(tag, "=", 2)
|
||||
|
||||
var defs = []string{
|
||||
@@ -603,8 +619,6 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
|
||||
|
||||
tagsMap[arr[0]] = body.String()
|
||||
}
|
||||
|
||||
tagsMap["rulename"] = p.rule.Name
|
||||
p.tagsMap = tagsMap
|
||||
|
||||
// handle tagsArr
|
||||
|
||||
@@ -22,10 +22,11 @@ type Router struct {
|
||||
AlertStats *astats.Stats
|
||||
Ctx *ctx.Context
|
||||
ExternalProcessors *process.ExternalProcessorsType
|
||||
LogDir string
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Alert: alert,
|
||||
@@ -35,6 +36,7 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
|
||||
AlertStats: astats,
|
||||
Ctx: ctx,
|
||||
ExternalProcessors: externalProcessors,
|
||||
LogDir: logDir,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +52,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.POST("/event", rt.pushEventToQueue)
|
||||
service.POST("/event-persist", rt.eventPersist)
|
||||
service.POST("/make-event", rt.makeEvent)
|
||||
service.GET("/event-detail/:hash", rt.eventDetail)
|
||||
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
|
||||
service.GET("/trace-logs/:traceid", rt.traceLogs)
|
||||
}
|
||||
|
||||
func Render(c *gin.Context, data, msg interface{}) {
|
||||
|
||||
28
alert/router/router_alert_eval_detail.go
Normal file
28
alert/router/router_alert_eval_detail.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) alertEvalDetail(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -13,9 +13,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
if event.RuleId == 0 {
|
||||
ginx.Bomb(200, "event is illegal")
|
||||
}
|
||||
event.FE2DB()
|
||||
|
||||
event.TagsMap = make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
event.TagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
if hit {
|
||||
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
|
||||
ginx.NewRender(c).Message(nil)
|
||||
@@ -74,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
dispatch.LogEvent(event, "http_push_queue")
|
||||
if !queue.EventQueue.PushFront(event) {
|
||||
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
|
||||
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
|
||||
ginx.Bomb(200, msg)
|
||||
logger.Warningf(msg)
|
||||
}
|
||||
@@ -104,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
|
||||
for i := 0; i < len(events); i++ {
|
||||
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
|
||||
if err != nil {
|
||||
logger.Warningf("event:%+v get node err:%v", events[i], err)
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
ginx.Bomb(200, "event node not exists")
|
||||
}
|
||||
|
||||
if node != rt.Alert.Heartbeat.Endpoint {
|
||||
err := forwardEvent(events[i], node)
|
||||
if err != nil {
|
||||
logger.Warningf("event:%+v forward err:%v", events[i], err)
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
ginx.Bomb(200, "event forward error")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
|
||||
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
|
||||
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
|
||||
if !exists {
|
||||
ginx.Bomb(200, "rule not exists")
|
||||
}
|
||||
@@ -142,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
|
||||
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
|
||||
return nil
|
||||
}
|
||||
|
||||
27
alert/router/router_event_detail.go
Normal file
27
alert/router/router_event_detail.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) eventDetail(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
28
alert/router/router_trace_logs.go
Normal file
28
alert/router/router_trace_logs.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) traceLogs(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
ginx.Bomb(200, "invalid trace id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := "trace_id=" + traceId
|
||||
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strings"
|
||||
@@ -140,7 +141,7 @@ func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, chan
|
||||
|
||||
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID int64, channel, target, res string, err error) {
|
||||
// 一个通知可能对应多个 event,都需要记录
|
||||
notis := make([]*models.NotificaitonRecord, 0, len(evts))
|
||||
notis := make([]*models.NotificationRecord, 0, len(evts))
|
||||
for _, evt := range evts {
|
||||
noti := models.NewNotificationRecord(evt, notifyRuleID, channel, target)
|
||||
if err != nil {
|
||||
@@ -166,11 +167,13 @@ func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID i
|
||||
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
|
||||
start := time.Now()
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
return "", err
|
||||
return string(res), err
|
||||
}
|
||||
|
||||
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
|
||||
@@ -202,6 +205,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
|
||||
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
|
||||
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
|
||||
conf := smtp
|
||||
if conf.Host == "" || conf.Port == 0 {
|
||||
logger.Warning("SMTP configurations invalid")
|
||||
logger.Debug("SMTP configurations invalid")
|
||||
<-mailQuit
|
||||
return
|
||||
}
|
||||
|
||||
@@ -30,14 +30,14 @@ type IbexCallBacker struct {
|
||||
|
||||
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
|
||||
return
|
||||
}
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if event.IsRecovered {
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
}
|
||||
|
||||
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
|
||||
if imodels.DB() == nil && ctx.IsCenter {
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
|
||||
id, err := strconv.ParseInt(idstr, 10, 64)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -82,34 +82,37 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
|
||||
}
|
||||
|
||||
func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
|
||||
|
||||
tpl := taskTplCache.Get(id)
|
||||
if tpl == nil {
|
||||
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
|
||||
return
|
||||
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
// check perm
|
||||
// tpl.GroupId - host - account 三元组校验权限
|
||||
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !can {
|
||||
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
tagsMap := make(map[string]string)
|
||||
@@ -133,11 +136,16 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
tags, err := json.Marshal(tagsMap)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// call ibex
|
||||
taskArgs := tpl.Args
|
||||
if args != "" {
|
||||
taskArgs = args
|
||||
}
|
||||
in := models.TaskForm{
|
||||
Title: tpl.Title + " FH: " + host,
|
||||
Account: tpl.Account,
|
||||
@@ -146,7 +154,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
Timeout: tpl.Timeout,
|
||||
Pause: tpl.Pause,
|
||||
Script: tpl.Script,
|
||||
Args: tpl.Args,
|
||||
Args: taskArgs,
|
||||
Stdin: string(tags),
|
||||
Action: "start",
|
||||
Creator: tpl.UpdateBy,
|
||||
@@ -156,8 +164,9 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write db
|
||||
@@ -178,11 +187,14 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
}
|
||||
|
||||
if err = record.Add(ctx); err != nil {
|
||||
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
|
||||
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return id, err
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
user := userCache.GetByUsername(username)
|
||||
if user != nil && user.IsAdmin() {
|
||||
return true, nil
|
||||
|
||||
@@ -24,7 +24,7 @@ func ReportNotifyRecordQueueSize(stats *astats.Stats) {
|
||||
|
||||
// 推送通知记录到队列
|
||||
// 若队列满 则返回 error
|
||||
func PushNotifyRecords(records []*models.NotificaitonRecord) error {
|
||||
func PushNotifyRecords(records []*models.NotificationRecord) error {
|
||||
for _, record := range records {
|
||||
if ok := NotifyRecordQueue.PushFront(record); !ok {
|
||||
logger.Warningf("notify record queue is full, record: %+v", record)
|
||||
@@ -59,16 +59,16 @@ func (c *NotifyRecordConsumer) LoopConsume() {
|
||||
}
|
||||
|
||||
// 类型转换,不然 CreateInBatches 会报错
|
||||
notis := make([]*models.NotificaitonRecord, 0, len(inotis))
|
||||
notis := make([]*models.NotificationRecord, 0, len(inotis))
|
||||
for _, inoti := range inotis {
|
||||
notis = append(notis, inoti.(*models.NotificaitonRecord))
|
||||
notis = append(notis, inoti.(*models.NotificationRecord))
|
||||
}
|
||||
|
||||
c.consume(notis)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *NotifyRecordConsumer) consume(notis []*models.NotificaitonRecord) {
|
||||
func (c *NotifyRecordConsumer) consume(notis []*models.NotificationRecord) {
|
||||
if err := models.DB(c.ctx).CreateInBatches(notis, 100).Error; err != nil {
|
||||
logger.Errorf("add notis:%v failed, err: %v", notis, err)
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
|
||||
channel := "script"
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
fpath := ".notify_scriptt"
|
||||
fpath := ".notify_script"
|
||||
if config.Type == 1 {
|
||||
fpath = config.Content
|
||||
} else {
|
||||
@@ -79,6 +79,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
|
||||
start := time.Now()
|
||||
err := startCmd(cmd)
|
||||
if err != nil {
|
||||
logger.Errorf("event_script_notify_fail: run cmd err: %v", err)
|
||||
@@ -88,6 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
|
||||
|
||||
res := buf.String()
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
|
||||
// 截断超出长度的输出
|
||||
if len(res) > 512 {
|
||||
|
||||
@@ -13,10 +13,53 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// webhookClientCache 缓存 http.Client,避免每次请求都创建新的 Client 导致连接泄露
|
||||
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
|
||||
|
||||
// 相同配置的 webhook 会复用同一个 Client
|
||||
func getWebhookClient(webhook *models.Webhook) *http.Client {
|
||||
clientKey := webhook.Hash()
|
||||
|
||||
if client, ok := webhookClientCache.Load(clientKey); ok {
|
||||
return client.(*http.Client)
|
||||
}
|
||||
|
||||
// 创建新的 Client
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
}
|
||||
|
||||
if poster.UseProxy(webhook.Url) {
|
||||
transport.Proxy = http.ProxyFromEnvironment
|
||||
}
|
||||
|
||||
timeout := webhook.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10
|
||||
}
|
||||
|
||||
newClient := &http.Client{
|
||||
Timeout: time.Duration(timeout) * time.Second,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
// 使用 LoadOrStore 确保并发安全,避免重复创建
|
||||
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
|
||||
if loaded {
|
||||
return actual.(*http.Client)
|
||||
}
|
||||
|
||||
return newClient
|
||||
}
|
||||
|
||||
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
|
||||
channel := "webhook"
|
||||
if webhook.Type == models.RuleCallback {
|
||||
@@ -29,7 +72,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
|
||||
return false, "", err
|
||||
}
|
||||
|
||||
@@ -37,7 +80,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
|
||||
req, err := http.NewRequest("POST", conf.Url, bf)
|
||||
if err != nil {
|
||||
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
|
||||
logger.Warningf("%s alertingWebhook failed to new request event:%s err:%v", channel, string(bs), err)
|
||||
return true, "", err
|
||||
}
|
||||
|
||||
@@ -55,25 +98,13 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
|
||||
}
|
||||
}
|
||||
insecureSkipVerify := false
|
||||
if webhook != nil {
|
||||
insecureSkipVerify = webhook.SkipVerify
|
||||
}
|
||||
|
||||
if conf.Client == nil {
|
||||
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
|
||||
conf.Client = &http.Client{
|
||||
Timeout: time.Duration(conf.Timeout) * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
|
||||
},
|
||||
}
|
||||
}
|
||||
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
|
||||
client := getWebhookClient(conf)
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
var resp *http.Response
|
||||
var body []byte
|
||||
resp, err = conf.Client.Do(req)
|
||||
resp, err = client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
@@ -88,18 +119,20 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return true, string(body), fmt.Errorf("status code is 429")
|
||||
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
|
||||
}
|
||||
|
||||
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return false, string(body), nil
|
||||
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
|
||||
}
|
||||
|
||||
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
retryCount := 0
|
||||
for retryCount < 3 {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(conf, event, stats)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
@@ -112,7 +145,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
|
||||
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
logger.Infof("push event:%+v to queue:%v", event, conf)
|
||||
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
|
||||
PushEvent(ctx, conf, event, stats)
|
||||
}
|
||||
}
|
||||
@@ -150,7 +183,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,7 +202,9 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
|
||||
|
||||
retryCount := 0
|
||||
for retryCount < webhook.RetryCount {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(webhook, events, stats)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
|
||||
@@ -1,20 +1,32 @@
|
||||
package cconf
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
)
|
||||
|
||||
type Center struct {
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
CleanPipelineExecutionDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
RSA httpx.RSAConfig
|
||||
AIAgent AIAgent
|
||||
}
|
||||
|
||||
type AIAgent struct {
|
||||
Enable bool `toml:"Enable"`
|
||||
SkillsPath string `toml:"SkillsPath"`
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
|
||||
@@ -300,6 +300,14 @@ ops:
|
||||
cname: View Alerting Engines
|
||||
- name: /system/version
|
||||
cname: View Product Version
|
||||
- name: /ai-config/agents
|
||||
cname: AI Config - Agents
|
||||
- name: /ai-config/llm-configs
|
||||
cname: AI Config - LLM Configs
|
||||
- name: /ai-config/skills
|
||||
cname: AI Config - Skills
|
||||
- name: /ai-config/mcp-servers
|
||||
cname: AI Config - MCP Servers
|
||||
|
||||
`
|
||||
)
|
||||
|
||||
@@ -43,4 +43,22 @@ var Plugins = []Plugin{
|
||||
Type: "pgsql",
|
||||
TypeName: "PostgreSQL",
|
||||
},
|
||||
{
|
||||
Id: 8,
|
||||
Category: "logging",
|
||||
Type: "doris",
|
||||
TypeName: "Doris",
|
||||
},
|
||||
{
|
||||
Id: 9,
|
||||
Category: "logging",
|
||||
Type: "opensearch",
|
||||
TypeName: "OpenSearch",
|
||||
},
|
||||
{
|
||||
Id: 10,
|
||||
Category: "logging",
|
||||
Type: "victorialogs",
|
||||
TypeName: "VictoriaLogs",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -2,10 +2,13 @@ package center
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/dispatch"
|
||||
@@ -96,6 +99,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
models.MigrateEP(ctx)
|
||||
}
|
||||
|
||||
// 初始化 siteUrl,如果为空则设置默认值
|
||||
InitSiteUrl(ctx, config.Alert.Heartbeat.IP, config.HTTP.Port)
|
||||
|
||||
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
@@ -121,18 +127,19 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
macros.RegisterMacro(macros.MacroInVain)
|
||||
dscache.Init(ctx, false)
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
writers := writer.NewWriters(config.Pushgw)
|
||||
|
||||
go version.GetGithubVersion()
|
||||
|
||||
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
|
||||
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
|
||||
cconf.Operations, dsCache, notifyConfigCache, promClients,
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
@@ -159,3 +166,67 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
httpClean()
|
||||
}, nil
|
||||
}
|
||||
|
||||
// initSiteUrl 初始化 site_info 中的 site_url,如果为空则使用服务器IP和端口设置默认值
|
||||
func InitSiteUrl(ctx *ctx.Context, serverIP string, serverPort int) {
|
||||
// 构造默认的 SiteUrl
|
||||
defaultSiteUrl := fmt.Sprintf("http://%s:%d", serverIP, serverPort)
|
||||
|
||||
// 获取现有的 site_info 配置
|
||||
siteInfoStr, err := models.ConfigsGet(ctx, "site_info")
|
||||
if err != nil {
|
||||
logger.Errorf("failed to get site_info config: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果 site_info 不存在,创建新的
|
||||
if siteInfoStr == "" {
|
||||
newSiteInfo := memsto.SiteInfo{
|
||||
SiteUrl: defaultSiteUrl,
|
||||
}
|
||||
siteInfoBytes, err := json.Marshal(newSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to set site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
|
||||
return
|
||||
}
|
||||
|
||||
// 检查现有的 site_info 中的 site_url 字段
|
||||
var existingSiteInfo memsto.SiteInfo
|
||||
err = json.Unmarshal([]byte(siteInfoStr), &existingSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to unmarshal site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果 site_url 已经有值,则不需要初始化
|
||||
if existingSiteInfo.SiteUrl != "" {
|
||||
return
|
||||
}
|
||||
|
||||
// 设置 site_url
|
||||
existingSiteInfo.SiteUrl = defaultSiteUrl
|
||||
|
||||
siteInfoBytes, err := json.Marshal(existingSiteInfo)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to marshal updated site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
|
||||
if err != nil {
|
||||
logger.Errorf("failed to update site_info: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
|
||||
}
|
||||
|
||||
@@ -3,11 +3,15 @@ package integration
|
||||
import (
|
||||
"encoding/json"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/container/set"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
@@ -15,7 +19,18 @@ import (
|
||||
|
||||
const SYSTEM = "system"
|
||||
|
||||
var BuiltinPayloadInFile *BuiltinPayloadInFileType
|
||||
|
||||
type BuiltinPayloadInFileType struct {
|
||||
Data map[uint64]map[string]map[string][]*models.BuiltinPayload // map[component_id]map[type]map[cate][]*models.BuiltinPayload
|
||||
IndexData map[int64]*models.BuiltinPayload // map[uuid]payload
|
||||
|
||||
BuiltinMetrics map[string]*models.BuiltinMetric
|
||||
}
|
||||
|
||||
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
BuiltinPayloadInFile = NewBuiltinPayloadInFileType()
|
||||
|
||||
err := models.InitBuiltinPayloads(ctx)
|
||||
if err != nil {
|
||||
logger.Warning("init old builtinPayloads fail ", err)
|
||||
@@ -109,13 +124,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
component.ID = old.ID
|
||||
}
|
||||
|
||||
// delete uuid is emtpy
|
||||
// delete uuid is empty
|
||||
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin payloads fail ", err)
|
||||
}
|
||||
|
||||
// delete builtin metrics uuid is emtpy
|
||||
// delete builtin metrics uuid is empty
|
||||
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin metrics fail ", err)
|
||||
@@ -146,11 +161,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
newAlerts := []models.AlertRule{}
|
||||
writeAlertFileFlag := false
|
||||
for _, alert := range alerts {
|
||||
if alert.UUID == 0 {
|
||||
writeAlertFileFlag = true
|
||||
alert.UUID = time.Now().UnixNano()
|
||||
time.Sleep(time.Microsecond)
|
||||
alert.UUID = time.Now().UnixMicro()
|
||||
}
|
||||
|
||||
newAlerts = append(newAlerts, alert)
|
||||
@@ -169,47 +183,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
Tags: alert.AppendTags,
|
||||
Content: string(content),
|
||||
UUID: alert.UUID,
|
||||
ID: alert.UUID,
|
||||
CreatedBy: SYSTEM,
|
||||
UpdatedBy: SYSTEM,
|
||||
}
|
||||
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinAlert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinAlert.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinAlert, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.ComponentID = component.ID
|
||||
old.Content = string(content)
|
||||
old.Name = alert.Name
|
||||
old.Tags = alert.AppendTags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if writeAlertFileFlag {
|
||||
bs, err = json.MarshalIndent(newAlerts, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin alerts file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,34 +239,14 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
Cate: "",
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Note: dashboard.Note,
|
||||
Content: string(content),
|
||||
UUID: dashboard.UUID,
|
||||
ID: dashboard.UUID,
|
||||
CreatedBy: SYSTEM,
|
||||
UpdatedBy: SYSTEM,
|
||||
}
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinDashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinDashboard.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinDashboard, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.ComponentID = component.ID
|
||||
old.Content = string(content)
|
||||
old.Name = dashboard.Name
|
||||
old.Tags = dashboard.Tags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
|
||||
}
|
||||
}
|
||||
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
|
||||
@@ -304,64 +264,21 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
metrics := []models.BuiltinMetric{}
|
||||
newMetrics := []models.BuiltinMetric{}
|
||||
err = json.Unmarshal(bs, &metrics)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component metrics file fail", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
writeMetricFileFlag := false
|
||||
for _, metric := range metrics {
|
||||
if metric.UUID == 0 {
|
||||
writeMetricFileFlag = true
|
||||
metric.UUID = time.Now().UnixNano()
|
||||
}
|
||||
newMetrics = append(newMetrics, metric)
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
metric.ID = metric.UUID
|
||||
metric.CreatedBy = SYSTEM
|
||||
metric.UpdatedBy = SYSTEM
|
||||
|
||||
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin metrics fail ", metric, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := metric.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin metrics fail ", metric, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.Collector = metric.Collector
|
||||
old.Typ = metric.Typ
|
||||
old.Name = metric.Name
|
||||
old.Unit = metric.Unit
|
||||
old.Note = metric.Note
|
||||
old.Lang = metric.Lang
|
||||
old.Expression = metric.Expression
|
||||
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
|
||||
}
|
||||
}
|
||||
BuiltinPayloadInFile.BuiltinMetrics[metric.Expression] = &metric
|
||||
}
|
||||
|
||||
if writeMetricFileFlag {
|
||||
bs, err = json.MarshalIndent(newMetrics, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin metrics file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
|
||||
@@ -375,6 +292,7 @@ type BuiltinBoard struct {
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Note string `json:"note"`
|
||||
CreateAt int64 `json:"create_at"`
|
||||
CreateBy string `json:"create_by"`
|
||||
UpdateAt int64 `json:"update_at"`
|
||||
@@ -387,3 +305,346 @@ type BuiltinBoard struct {
|
||||
Hide int `json:"hide"` // 0: false, 1: true
|
||||
UUID int64 `json:"uuid"`
|
||||
}
|
||||
|
||||
func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
|
||||
return &BuiltinPayloadInFileType{
|
||||
Data: make(map[uint64]map[string]map[string][]*models.BuiltinPayload),
|
||||
IndexData: make(map[int64]*models.BuiltinPayload),
|
||||
BuiltinMetrics: make(map[string]*models.BuiltinMetric),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
|
||||
if _, exists := b.Data[bp.ComponentID]; !exists {
|
||||
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
|
||||
}
|
||||
bpInType := b.Data[bp.ComponentID]
|
||||
if _, exists := bpInType[bp.Type]; !exists {
|
||||
bpInType[bp.Type] = make(map[string][]*models.BuiltinPayload)
|
||||
}
|
||||
bpInCate := bpInType[bp.Type]
|
||||
if _, exists := bpInCate[bp.Cate]; !exists {
|
||||
bpInCate[bp.Cate] = make([]*models.BuiltinPayload, 0)
|
||||
}
|
||||
bpInCate[bp.Cate] = append(bpInCate[bp.Cate], bp)
|
||||
|
||||
b.IndexData[bp.UUID] = bp
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
|
||||
|
||||
for _, source := range b.Data {
|
||||
if source == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
typeMap, exists := source[typ]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
payloads, exists := typeMap[cate]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(payloads) > 0 {
|
||||
return payloads[0].Component
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
|
||||
|
||||
var result []*models.BuiltinPayload
|
||||
source := b.Data[componentId]
|
||||
|
||||
if source == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
typeMap, exists := source[typ]
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if cate != "" {
|
||||
payloads, exists := typeMap[cate]
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
result = append(result, filterByQuery(payloads, query)...)
|
||||
} else {
|
||||
for _, payloads := range typeMap {
|
||||
result = append(result, filterByQuery(payloads, query)...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(result) > 0 {
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].Name < result[j].Name
|
||||
})
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetBuiltinPayloadCates(typ string, componentId uint64) ([]string, error) {
|
||||
var result []string
|
||||
source := b.Data[componentId]
|
||||
if source == nil {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
typeData := source[typ]
|
||||
if typeData == nil {
|
||||
return result, nil
|
||||
}
|
||||
for cate := range typeData {
|
||||
result = append(result, cate)
|
||||
}
|
||||
|
||||
sort.Strings(result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.BuiltinPayload {
|
||||
if query == "" {
|
||||
return payloads
|
||||
}
|
||||
|
||||
queryLower := strings.ToLower(query)
|
||||
var filtered []*models.BuiltinPayload
|
||||
for _, p := range payloads {
|
||||
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
|
||||
filtered = append(filtered, p)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricGets(metricsInDB []*models.BuiltinMetric, lang, collector, typ, query, unit string, limit, offset int) ([]*models.BuiltinMetric, int, error) {
|
||||
var filteredMetrics []*models.BuiltinMetric
|
||||
expressionSet := set.NewStringSet()
|
||||
builtinMetricsByDB := convertBuiltinMetricByDB(metricsInDB)
|
||||
builtinMetricsMap := make(map[string]*models.BuiltinMetric)
|
||||
|
||||
for expression, metric := range builtinMetricsByDB {
|
||||
builtinMetricsMap[expression] = metric
|
||||
}
|
||||
|
||||
for expression, metric := range b.BuiltinMetrics {
|
||||
builtinMetricsMap[expression] = metric
|
||||
}
|
||||
|
||||
for _, metric := range builtinMetricsMap {
|
||||
if !applyFilter(metric, collector, typ, query, unit) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if expression is already in db cache
|
||||
// NOTE: 忽略重复的expression,特别的,在旧版本中,用户可能已经创建了重复的metrics,需要覆盖掉ByFile中相同的Metrics
|
||||
// NOTE: Ignore duplicate expressions, especially in the old version, users may have created duplicate metrics,
|
||||
if expressionSet.Exists(metric.Expression) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Add db expression in set.
|
||||
expressionSet.Add(metric.Expression)
|
||||
|
||||
// Apply language
|
||||
trans, err := getTranslationWithLanguage(metric, lang)
|
||||
if err != nil {
|
||||
logger.Errorf("Error getting translation for metric %s: %v", metric.Name, err)
|
||||
continue // Skip if translation not found
|
||||
}
|
||||
metric.Name = trans.Name
|
||||
metric.Note = trans.Note
|
||||
|
||||
filteredMetrics = append(filteredMetrics, metric)
|
||||
}
|
||||
|
||||
// Sort metrics
|
||||
sort.Slice(filteredMetrics, func(i, j int) bool {
|
||||
if filteredMetrics[i].Collector != filteredMetrics[j].Collector {
|
||||
return filteredMetrics[i].Collector < filteredMetrics[j].Collector
|
||||
}
|
||||
if filteredMetrics[i].Typ != filteredMetrics[j].Typ {
|
||||
return filteredMetrics[i].Typ < filteredMetrics[j].Typ
|
||||
}
|
||||
return filteredMetrics[i].Expression < filteredMetrics[j].Expression
|
||||
})
|
||||
|
||||
totalCount := len(filteredMetrics)
|
||||
|
||||
// Validate parameters
|
||||
if offset < 0 {
|
||||
offset = 0
|
||||
}
|
||||
if limit < 0 {
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Handle edge cases
|
||||
if offset >= totalCount || limit == 0 {
|
||||
return []*models.BuiltinMetric{}, totalCount, nil
|
||||
}
|
||||
|
||||
// Apply pagination
|
||||
end := offset + limit
|
||||
if end > totalCount {
|
||||
end = totalCount
|
||||
}
|
||||
|
||||
return filteredMetrics[offset:end], totalCount, nil
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricTypes(lang, collector, query string) []string {
|
||||
typeSet := set.NewStringSet()
|
||||
for _, metric := range b.BuiltinMetrics {
|
||||
if !applyFilter(metric, collector, "", query, "") {
|
||||
continue
|
||||
}
|
||||
|
||||
typeSet.Add(metric.Typ)
|
||||
}
|
||||
|
||||
return typeSet.ToSlice()
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) BuiltinMetricCollectors(lang, typ, query string) []string {
|
||||
collectorSet := set.NewStringSet()
|
||||
for _, metric := range b.BuiltinMetrics {
|
||||
if !applyFilter(metric, "", typ, query, "") {
|
||||
continue
|
||||
}
|
||||
|
||||
collectorSet.Add(metric.Collector)
|
||||
}
|
||||
return collectorSet.ToSlice()
|
||||
}
|
||||
|
||||
func applyFilter(metric *models.BuiltinMetric, collector, typ, query, unit string) bool {
|
||||
if collector != "" && collector != metric.Collector {
|
||||
return false
|
||||
}
|
||||
|
||||
if typ != "" && typ != metric.Typ {
|
||||
return false
|
||||
}
|
||||
|
||||
if unit != "" && !containsUnit(unit, metric.Unit) {
|
||||
return false
|
||||
}
|
||||
|
||||
if query != "" && !applyQueryFilter(metric, query) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func containsUnit(unit, metricUnit string) bool {
|
||||
us := strings.Split(unit, ",")
|
||||
for _, u := range us {
|
||||
if u == metricUnit {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func applyQueryFilter(metric *models.BuiltinMetric, query string) bool {
|
||||
qs := strings.Split(query, " ")
|
||||
for _, q := range qs {
|
||||
if strings.HasPrefix(q, "-") {
|
||||
q = strings.TrimPrefix(q, "-")
|
||||
if strings.Contains(metric.Name, q) || strings.Contains(metric.Note, q) || strings.Contains(metric.Expression, q) {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if !strings.Contains(metric.Name, q) && !strings.Contains(metric.Note, q) && !strings.Contains(metric.Expression, q) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func getTranslationWithLanguage(bm *models.BuiltinMetric, lang string) (*models.Translation, error) {
|
||||
var defaultTranslation *models.Translation
|
||||
for _, t := range bm.Translation {
|
||||
if t.Lang == lang {
|
||||
return &t, nil
|
||||
}
|
||||
|
||||
if t.Lang == "en_US" {
|
||||
defaultTranslation = &t
|
||||
}
|
||||
}
|
||||
|
||||
if defaultTranslation != nil {
|
||||
return defaultTranslation, nil
|
||||
}
|
||||
|
||||
return nil, errors.Errorf("translation not found for metric %s", bm.Name)
|
||||
}
|
||||
|
||||
func convertBuiltinMetricByDB(metricsInDB []*models.BuiltinMetric) map[string]*models.BuiltinMetric {
|
||||
builtinMetricsByDB := make(map[string]*models.BuiltinMetric)
|
||||
builtinMetricsByDBList := make(map[string][]*models.BuiltinMetric)
|
||||
|
||||
for _, metric := range metricsInDB {
|
||||
builtinMetrics, ok := builtinMetricsByDBList[metric.Expression]
|
||||
if !ok {
|
||||
builtinMetrics = []*models.BuiltinMetric{}
|
||||
}
|
||||
|
||||
builtinMetrics = append(builtinMetrics, metric)
|
||||
builtinMetricsByDBList[metric.Expression] = builtinMetrics
|
||||
}
|
||||
|
||||
for expression, builtinMetrics := range builtinMetricsByDBList {
|
||||
if len(builtinMetrics) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// NOTE: 为兼容旧版本用户已经创建的 metrics,同时将修改 metrics 收敛到同一个记录上,
|
||||
// 我们选择使用 expression 相同但是 id 最小的 metric 记录作为主要的 Metric。
|
||||
sort.Slice(builtinMetrics, func(i, j int) bool {
|
||||
return builtinMetrics[i].ID < builtinMetrics[j].ID
|
||||
})
|
||||
|
||||
currentBuiltinMetric := builtinMetrics[0]
|
||||
// User has no customized translation, so we can merge it
|
||||
if len(currentBuiltinMetric.Translation) == 0 {
|
||||
translationMap := make(map[string]models.Translation)
|
||||
for _, bm := range builtinMetrics {
|
||||
for _, t := range getDefaultTranslation(bm) {
|
||||
translationMap[t.Lang] = t
|
||||
}
|
||||
}
|
||||
currentBuiltinMetric.Translation = make([]models.Translation, 0, len(translationMap))
|
||||
for _, t := range translationMap {
|
||||
currentBuiltinMetric.Translation = append(currentBuiltinMetric.Translation, t)
|
||||
}
|
||||
}
|
||||
|
||||
builtinMetricsByDB[expression] = currentBuiltinMetric
|
||||
}
|
||||
|
||||
return builtinMetricsByDB
|
||||
}
|
||||
|
||||
func getDefaultTranslation(bm *models.BuiltinMetric) []models.Translation {
|
||||
if len(bm.Translation) != 0 {
|
||||
return bm.Translation
|
||||
}
|
||||
|
||||
return []models.Translation{{
|
||||
Lang: bm.Lang,
|
||||
Name: bm.Name,
|
||||
Note: bm.Note,
|
||||
}}
|
||||
}
|
||||
|
||||
@@ -118,7 +118,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := storage.MSet(context.Background(), s.redis, newMap)
|
||||
err := storage.MSet(context.Background(), s.redis, newMap, 7*24*time.Hour)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
@@ -127,7 +127,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
if len(extendMap) > 0 {
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap)
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap, 7*24*time.Hour)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
|
||||
@@ -24,11 +24,11 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rakyll/statik/fs"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
@@ -51,6 +51,7 @@ type Router struct {
|
||||
UserGroupCache *memsto.UserGroupCacheType
|
||||
UserTokenCache *memsto.UserTokenCacheType
|
||||
Ctx *ctx.Context
|
||||
LogDir string
|
||||
|
||||
HeartbeatHook HeartbeatHookFunc
|
||||
TargetDeleteHook models.TargetDeleteHookFunc
|
||||
@@ -61,7 +62,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
|
||||
pc *prom.PromClientMap, redis storage.Redis,
|
||||
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Center: center,
|
||||
@@ -80,6 +81,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
UserGroupCache: ugc,
|
||||
UserTokenCache: utc,
|
||||
Ctx: ctx,
|
||||
LogDir: logDir,
|
||||
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
|
||||
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
|
||||
AlertRuleModifyHook: func(ar *models.AlertRule) {},
|
||||
@@ -177,6 +179,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages := r.Group(pagesPrefix)
|
||||
{
|
||||
|
||||
pages.DELETE("/datasource/series", rt.auth(), rt.admin(), rt.deleteDatasourceSeries)
|
||||
if rt.Center.AnonymousAccess.PromQuerier {
|
||||
pages.Any("/proxy/:id/*url", rt.dsProxy)
|
||||
pages.POST("/query-range-batch", rt.promBatchQueryRange)
|
||||
@@ -210,8 +213,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
|
||||
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
|
||||
|
||||
pages.POST("/ds-query", rt.auth(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
|
||||
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
|
||||
|
||||
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
|
||||
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
|
||||
@@ -231,6 +234,11 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/log-query", rt.QueryLog)
|
||||
}
|
||||
|
||||
// OpenSearch 专用接口
|
||||
pages.POST("/os-indices", rt.QueryOSIndices)
|
||||
pages.POST("/os-variable", rt.QueryOSVariable)
|
||||
pages.POST("/os-fields", rt.QueryOSFields)
|
||||
|
||||
pages.GET("/sql-template", rt.QuerySqlTemplate)
|
||||
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
|
||||
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
|
||||
@@ -244,9 +252,13 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/auth/redirect", rt.loginRedirect)
|
||||
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
|
||||
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
|
||||
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
|
||||
pages.GET("/auth/redirect/feishu", rt.loginRedirectFeiShu)
|
||||
pages.GET("/auth/callback", rt.loginCallback)
|
||||
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
|
||||
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
|
||||
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
|
||||
pages.GET("/auth/callback/feishu", rt.loginCallbackFeiShu)
|
||||
pages.GET("/auth/perms", rt.allPerms)
|
||||
|
||||
pages.GET("/metrics/desc", rt.metricsDescGetFile)
|
||||
@@ -254,6 +266,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
|
||||
pages.GET("/notify-channels", rt.notifyChannelsGets)
|
||||
pages.GET("/contact-keys", rt.contactKeysGets)
|
||||
pages.GET("/install-date", rt.installDateGet)
|
||||
|
||||
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
|
||||
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
|
||||
@@ -309,6 +322,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
|
||||
|
||||
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
|
||||
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
|
||||
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
|
||||
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
|
||||
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
|
||||
@@ -356,6 +370,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
|
||||
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
|
||||
|
||||
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
@@ -372,13 +387,15 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
|
||||
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
|
||||
pages.POST("/busi-groups/alert-rules/clones", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.batchAlertRuleClone)
|
||||
pages.POST("/busi-group/alert-rules/notify-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleNotifyTryRun)
|
||||
pages.POST("/busi-group/alert-rules/enable-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleEnableTryRun)
|
||||
|
||||
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
|
||||
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
|
||||
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
|
||||
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
|
||||
pages.PUT("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRulePutByFE)
|
||||
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
|
||||
|
||||
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
|
||||
@@ -397,10 +414,14 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
|
||||
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
|
||||
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
|
||||
pages.POST("/alert-subscribe/alert-subscribes-tryrun", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.alertSubscribeTryRun)
|
||||
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
|
||||
pages.GET("/event-detail/:hash", rt.eventDetailPage)
|
||||
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
|
||||
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
|
||||
|
||||
// card logic
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
|
||||
@@ -439,7 +460,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
|
||||
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
|
||||
|
||||
pages.GET("/roles", rt.auth(), rt.user(), rt.perm("/roles"), rt.roleGets)
|
||||
pages.GET("/roles", rt.auth(), rt.user(), rt.roleGets)
|
||||
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
|
||||
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
|
||||
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
|
||||
@@ -499,6 +520,50 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.PUT("/config", rt.auth(), rt.admin(), rt.configPutByKey)
|
||||
pages.GET("/site-info", rt.siteInfo)
|
||||
|
||||
// AI Config management
|
||||
pages.GET("/ai-agents", rt.auth(), rt.admin(), rt.aiAgentGets)
|
||||
pages.GET("/ai-agent/:id", rt.auth(), rt.admin(), rt.aiAgentGet)
|
||||
pages.POST("/ai-agents", rt.auth(), rt.admin(), rt.aiAgentAdd)
|
||||
pages.PUT("/ai-agent/:id", rt.auth(), rt.admin(), rt.aiAgentPut)
|
||||
pages.DELETE("/ai-agent/:id", rt.auth(), rt.admin(), rt.aiAgentDel)
|
||||
|
||||
pages.GET("/ai-llm-configs", rt.auth(), rt.admin(), rt.aiLLMConfigGets)
|
||||
pages.GET("/ai-llm-config/:id", rt.auth(), rt.admin(), rt.aiLLMConfigGet)
|
||||
pages.POST("/ai-llm-configs", rt.auth(), rt.admin(), rt.aiLLMConfigAdd)
|
||||
pages.PUT("/ai-llm-config/:id", rt.auth(), rt.admin(), rt.aiLLMConfigPut)
|
||||
pages.DELETE("/ai-llm-config/:id", rt.auth(), rt.admin(), rt.aiLLMConfigDel)
|
||||
pages.POST("/ai-llm-config/test", rt.auth(), rt.admin(), rt.aiLLMConfigTest)
|
||||
|
||||
pages.GET("/ai-skills", rt.auth(), rt.admin(), rt.aiSkillGets)
|
||||
pages.GET("/ai-skill/:id", rt.auth(), rt.admin(), rt.aiSkillGet)
|
||||
pages.POST("/ai-skills", rt.auth(), rt.admin(), rt.aiSkillAdd)
|
||||
pages.PUT("/ai-skill/:id", rt.auth(), rt.admin(), rt.aiSkillPut)
|
||||
pages.DELETE("/ai-skill/:id", rt.auth(), rt.admin(), rt.aiSkillDel)
|
||||
pages.POST("/ai-skills/import", rt.auth(), rt.admin(), rt.aiSkillImport)
|
||||
pages.POST("/ai-skill/:id/files", rt.auth(), rt.admin(), rt.aiSkillFileAdd)
|
||||
pages.GET("/ai-skill-file/:fileId", rt.auth(), rt.admin(), rt.aiSkillFileGet)
|
||||
pages.DELETE("/ai-skill-file/:fileId", rt.auth(), rt.admin(), rt.aiSkillFileDel)
|
||||
|
||||
pages.GET("/mcp-servers", rt.auth(), rt.admin(), rt.mcpServerGets)
|
||||
pages.GET("/mcp-server/:id", rt.auth(), rt.admin(), rt.mcpServerGet)
|
||||
pages.POST("/mcp-servers", rt.auth(), rt.admin(), rt.mcpServerAdd)
|
||||
pages.PUT("/mcp-server/:id", rt.auth(), rt.admin(), rt.mcpServerPut)
|
||||
pages.DELETE("/mcp-server/:id", rt.auth(), rt.admin(), rt.mcpServerDel)
|
||||
pages.POST("/ai-agent/:id/test", rt.auth(), rt.admin(), rt.aiAgentTest)
|
||||
pages.POST("/mcp-server/test", rt.auth(), rt.admin(), rt.mcpServerTest)
|
||||
pages.GET("/mcp-server/:id/tools", rt.auth(), rt.admin(), rt.mcpServerTools)
|
||||
|
||||
// AI Conversations
|
||||
pages.GET("/ai-conversations", rt.auth(), rt.user(), rt.aiConversationGets)
|
||||
pages.POST("/ai-conversations", rt.auth(), rt.user(), rt.aiConversationAdd)
|
||||
pages.GET("/ai-conversation/:id", rt.auth(), rt.user(), rt.aiConversationGet)
|
||||
pages.PUT("/ai-conversation/:id", rt.auth(), rt.user(), rt.aiConversationPut)
|
||||
pages.DELETE("/ai-conversation/:id", rt.auth(), rt.user(), rt.aiConversationDel)
|
||||
pages.POST("/ai-conversation/:id/messages", rt.auth(), rt.user(), rt.aiConversationMessageAdd)
|
||||
|
||||
// AI chat (SSE), dispatches by action_key
|
||||
pages.POST("/ai-chat", rt.auth(), rt.user(), rt.aiChat)
|
||||
|
||||
// source token 相关路由
|
||||
pages.POST("/source-token", rt.auth(), rt.user(), rt.sourceTokenAdd)
|
||||
|
||||
@@ -513,10 +578,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
|
||||
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
|
||||
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
|
||||
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/components"), rt.builtinPayloadGet)
|
||||
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
|
||||
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
|
||||
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
|
||||
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUID)
|
||||
|
||||
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
|
||||
pages.DELETE("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/del"), rt.messageTemplatesDel)
|
||||
@@ -534,6 +598,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
|
||||
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
|
||||
|
||||
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
|
||||
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
|
||||
|
||||
// 事件Pipeline相关路由
|
||||
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
|
||||
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
|
||||
@@ -543,6 +610,19 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
|
||||
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
|
||||
|
||||
// API 触发工作流
|
||||
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
|
||||
// SSE 流式执行工作流
|
||||
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
|
||||
|
||||
// 事件Pipeline执行记录路由
|
||||
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
|
||||
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
|
||||
|
||||
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
|
||||
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
|
||||
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
|
||||
@@ -550,8 +630,18 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels"), rt.notifyChannelsGet)
|
||||
pages.GET("/simplified-notify-channel-configs", rt.notifyChannelsGetForNormalUser)
|
||||
pages.GET("/flashduty-channel-list/:id", rt.auth(), rt.user(), rt.flashDutyNotifyChannelsGet)
|
||||
pages.GET("/pagerduty-integration-key/:id/:service_id/:integration_id", rt.auth(), rt.user(), rt.pagerDutyIntegrationKeyGet)
|
||||
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
|
||||
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
|
||||
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
|
||||
|
||||
// saved view 查询条件保存相关路由
|
||||
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
|
||||
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
|
||||
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
|
||||
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
|
||||
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
|
||||
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
|
||||
}
|
||||
|
||||
r.GET("/api/n9e/versions", func(c *gin.Context) {
|
||||
@@ -608,6 +698,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/busi-groups", rt.busiGroupGetsByService)
|
||||
|
||||
service.GET("/datasources", rt.datasourceGetsByService)
|
||||
service.GET("/datasource-rsa-config", rt.datasourceRsaConfigGet)
|
||||
service.GET("/datasource-ids", rt.getDatasourceIds)
|
||||
service.POST("/server-heartbeat", rt.serverHeartbeat)
|
||||
service.GET("/servers-active", rt.serversActive)
|
||||
@@ -615,6 +706,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/recording-rules", rt.recordingRuleGetsByService)
|
||||
|
||||
service.GET("/alert-mutes", rt.alertMuteGets)
|
||||
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
|
||||
service.POST("/alert-mutes", rt.alertMuteAddByService)
|
||||
service.DELETE("/alert-mutes", rt.alertMuteDel)
|
||||
|
||||
@@ -663,6 +755,17 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/message-templates", rt.messageTemplateGets)
|
||||
|
||||
service.GET("/event-pipelines", rt.eventPipelinesListByService)
|
||||
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
|
||||
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
|
||||
service.POST("/event-pipeline-execution", rt.eventPipelineExecutionAdd)
|
||||
|
||||
// 手机号加密存储配置接口
|
||||
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
|
||||
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
|
||||
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
|
||||
|
||||
service.GET("/builtin-components", rt.builtinComponentsGets)
|
||||
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
747
center/router/router_ai_config.go
Normal file
747
center/router/router_ai_config.go
Normal file
@@ -0,0 +1,747 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// ========================
|
||||
// AI Agent handlers
|
||||
// ========================
|
||||
|
||||
func (rt *Router) aiAgentGets(c *gin.Context) {
|
||||
lst, err := models.AIAgentGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiAgentGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIAgentGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai agent not found")
|
||||
}
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiAgentAdd(c *gin.Context) {
|
||||
var obj models.AIAgent
|
||||
ginx.BindJSON(c, &obj)
|
||||
ginx.Dangerous(obj.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
ginx.Dangerous(obj.Create(rt.Ctx, me.Username))
|
||||
ginx.NewRender(c).Data(obj.Id, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiAgentPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIAgentGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai agent not found")
|
||||
}
|
||||
|
||||
var ref models.AIAgent
|
||||
ginx.BindJSON(c, &ref)
|
||||
ginx.Dangerous(ref.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
ginx.NewRender(c).Message(obj.Update(rt.Ctx, me.Username, ref))
|
||||
}
|
||||
|
||||
func (rt *Router) aiAgentDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIAgentGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai agent not found")
|
||||
}
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
// ========================
|
||||
// AI Skill handlers
|
||||
// ========================
|
||||
|
||||
func (rt *Router) aiSkillGets(c *gin.Context) {
|
||||
search := ginx.QueryStr(c, "search", "")
|
||||
lst, err := models.AISkillGets(rt.Ctx, search)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AISkillGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai skill not found")
|
||||
}
|
||||
|
||||
// Include associated files (without content)
|
||||
files, err := models.AISkillFileGets(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
obj.Files = files
|
||||
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillAdd(c *gin.Context) {
|
||||
var obj models.AISkill
|
||||
ginx.BindJSON(c, &obj)
|
||||
ginx.Dangerous(obj.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
obj.CreatedBy = me.Username
|
||||
obj.UpdatedBy = me.Username
|
||||
|
||||
ginx.Dangerous(obj.Create(rt.Ctx))
|
||||
ginx.NewRender(c).Data(obj.Id, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AISkillGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai skill not found")
|
||||
}
|
||||
|
||||
var ref models.AISkill
|
||||
ginx.BindJSON(c, &ref)
|
||||
ginx.Dangerous(ref.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ref.UpdatedBy = me.Username
|
||||
|
||||
ginx.NewRender(c).Message(obj.Update(rt.Ctx, ref))
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AISkillGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai skill not found")
|
||||
}
|
||||
|
||||
// Cascade delete skill files
|
||||
ginx.Dangerous(models.AISkillFileDeleteBySkillId(rt.Ctx, id))
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillImport(c *gin.Context) {
|
||||
file, header, err := c.Request.FormFile("file")
|
||||
ginx.Dangerous(err)
|
||||
defer file.Close()
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(header.Filename))
|
||||
if ext != ".md" {
|
||||
ginx.Bomb(http.StatusBadRequest, "only .md files are supported")
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(file)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
meta, instructions := parseSkillMarkdown(string(content), header.Filename, ext)
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
skill := models.AISkill{
|
||||
Name: meta.Name,
|
||||
Description: meta.Description,
|
||||
Instructions: instructions,
|
||||
License: meta.License,
|
||||
Compatibility: meta.Compatibility,
|
||||
Metadata: meta.Metadata,
|
||||
AllowedTools: meta.AllowedTools,
|
||||
CreatedBy: me.Username,
|
||||
UpdatedBy: me.Username,
|
||||
}
|
||||
ginx.Dangerous(skill.Create(rt.Ctx))
|
||||
ginx.NewRender(c).Data(skill.Id, nil)
|
||||
}
|
||||
|
||||
// parseSkillMarkdown parses a SKILL.md file with optional YAML frontmatter.
|
||||
// Frontmatter format:
|
||||
//
|
||||
// ---
|
||||
// name: my-skill
|
||||
// description: what this skill does
|
||||
// ---
|
||||
// # Actual instructions content...
|
||||
type skillFrontmatter struct {
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
License string `yaml:"license"`
|
||||
Compatibility string `yaml:"compatibility"`
|
||||
Metadata map[string]string `yaml:"metadata"`
|
||||
AllowedTools string `yaml:"allowed-tools"`
|
||||
}
|
||||
|
||||
func parseSkillMarkdown(content, filename, ext string) (meta skillFrontmatter, instructions string) {
|
||||
text := strings.TrimSpace(content)
|
||||
|
||||
// Try to parse YAML frontmatter (between --- delimiters)
|
||||
if strings.HasPrefix(text, "---") {
|
||||
endIdx := strings.Index(text[3:], "\n---")
|
||||
if endIdx >= 0 {
|
||||
frontmatter := text[3 : 3+endIdx]
|
||||
body := strings.TrimSpace(text[3+endIdx+4:]) // skip past closing ---
|
||||
|
||||
if yaml.Unmarshal([]byte(frontmatter), &meta) == nil && meta.Name != "" {
|
||||
return meta, body
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No valid frontmatter, fallback: filename as name, entire content as instructions
|
||||
meta.Name = strings.TrimSuffix(filename, ext)
|
||||
return meta, content
|
||||
}
|
||||
|
||||
// ========================
|
||||
// AI Skill File handlers
|
||||
// ========================
|
||||
|
||||
func (rt *Router) aiSkillFileAdd(c *gin.Context) {
|
||||
skillId := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
// Verify skill exists
|
||||
skill, err := models.AISkillGetById(rt.Ctx, skillId)
|
||||
ginx.Dangerous(err)
|
||||
if skill == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai skill not found")
|
||||
}
|
||||
|
||||
file, header, err := c.Request.FormFile("file")
|
||||
ginx.Dangerous(err)
|
||||
defer file.Close()
|
||||
|
||||
// Validate file extension
|
||||
ext := strings.ToLower(filepath.Ext(header.Filename))
|
||||
allowed := map[string]bool{".md": true, ".txt": true, ".json": true, ".yaml": true, ".yml": true, ".csv": true}
|
||||
if !allowed[ext] {
|
||||
ginx.Bomb(http.StatusBadRequest, "file type not allowed, only .md/.txt/.json/.yaml/.csv")
|
||||
}
|
||||
|
||||
// Validate file size (2MB max)
|
||||
if header.Size > 2*1024*1024 {
|
||||
ginx.Bomb(http.StatusBadRequest, "file size exceeds 2MB limit")
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(file)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
skillFile := models.AISkillFile{
|
||||
SkillId: skillId,
|
||||
Name: header.Filename,
|
||||
Content: string(content),
|
||||
CreatedBy: me.Username,
|
||||
}
|
||||
ginx.Dangerous(skillFile.Create(rt.Ctx))
|
||||
ginx.NewRender(c).Data(skillFile.Id, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillFileGet(c *gin.Context) {
|
||||
fileId := ginx.UrlParamInt64(c, "fileId")
|
||||
obj, err := models.AISkillFileGetById(rt.Ctx, fileId)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "file not found")
|
||||
}
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiSkillFileDel(c *gin.Context) {
|
||||
fileId := ginx.UrlParamInt64(c, "fileId")
|
||||
obj, err := models.AISkillFileGetById(rt.Ctx, fileId)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "file not found")
|
||||
}
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
// ========================
|
||||
// MCP Server handlers
|
||||
// ========================
|
||||
|
||||
func (rt *Router) mcpServerGets(c *gin.Context) {
|
||||
lst, err := models.MCPServerGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) mcpServerGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.MCPServerGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "mcp server not found")
|
||||
}
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) mcpServerAdd(c *gin.Context) {
|
||||
var obj models.MCPServer
|
||||
ginx.BindJSON(c, &obj)
|
||||
ginx.Dangerous(obj.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
obj.CreatedBy = me.Username
|
||||
obj.UpdatedBy = me.Username
|
||||
|
||||
ginx.Dangerous(obj.Create(rt.Ctx))
|
||||
ginx.NewRender(c).Data(obj.Id, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) mcpServerPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.MCPServerGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "mcp server not found")
|
||||
}
|
||||
|
||||
var ref models.MCPServer
|
||||
ginx.BindJSON(c, &ref)
|
||||
ginx.Dangerous(ref.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ref.UpdatedBy = me.Username
|
||||
|
||||
ginx.NewRender(c).Message(obj.Update(rt.Ctx, ref))
|
||||
}
|
||||
|
||||
func (rt *Router) mcpServerDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.MCPServerGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "mcp server not found")
|
||||
}
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
// ========================
|
||||
// AI LLM Config handlers
|
||||
// ========================
|
||||
|
||||
func (rt *Router) aiLLMConfigGets(c *gin.Context) {
|
||||
lst, err := models.AILLMConfigGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiLLMConfigGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AILLMConfigGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai llm config not found")
|
||||
}
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiLLMConfigAdd(c *gin.Context) {
|
||||
var obj models.AILLMConfig
|
||||
ginx.BindJSON(c, &obj)
|
||||
ginx.Dangerous(obj.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
ginx.Dangerous(obj.Create(rt.Ctx, me.Username))
|
||||
ginx.NewRender(c).Data(obj.Id, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiLLMConfigPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AILLMConfigGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai llm config not found")
|
||||
}
|
||||
|
||||
var ref models.AILLMConfig
|
||||
ginx.BindJSON(c, &ref)
|
||||
ginx.Dangerous(ref.Verify())
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
ginx.NewRender(c).Message(obj.Update(rt.Ctx, me.Username, ref))
|
||||
}
|
||||
|
||||
func (rt *Router) aiLLMConfigDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AILLMConfigGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai llm config not found")
|
||||
}
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) aiLLMConfigTest(c *gin.Context) {
|
||||
var body struct {
|
||||
APIType string `json:"api_type"`
|
||||
APIURL string `json:"api_url"`
|
||||
APIKey string `json:"api_key"`
|
||||
Model string `json:"model"`
|
||||
ExtraConfig models.LLMExtraConfig `json:"extra_config"`
|
||||
}
|
||||
ginx.BindJSON(c, &body)
|
||||
|
||||
if body.APIType == "" || body.APIURL == "" || body.APIKey == "" || body.Model == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "api_type, api_url, api_key, model are required")
|
||||
}
|
||||
|
||||
obj := &models.AILLMConfig{
|
||||
APIType: body.APIType,
|
||||
APIURL: body.APIURL,
|
||||
APIKey: body.APIKey,
|
||||
Model: body.Model,
|
||||
ExtraConfig: body.ExtraConfig,
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
testErr := testAIAgent(obj)
|
||||
durationMs := time.Since(start).Milliseconds()
|
||||
|
||||
result := gin.H{
|
||||
"success": testErr == nil,
|
||||
"duration_ms": durationMs,
|
||||
}
|
||||
ginx.NewRender(c).Data(result, testErr)
|
||||
}
|
||||
|
||||
// ========================
|
||||
// AI Agent test
|
||||
// ========================
|
||||
|
||||
func (rt *Router) aiAgentTest(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
agent, err := models.AIAgentGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if agent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "ai agent not found")
|
||||
}
|
||||
|
||||
llmCfg, err := models.AILLMConfigGetById(rt.Ctx, agent.LLMConfigId)
|
||||
ginx.Dangerous(err)
|
||||
if llmCfg == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "referenced LLM config not found")
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
testErr := testAIAgent(llmCfg)
|
||||
durationMs := time.Since(start).Milliseconds()
|
||||
|
||||
result := gin.H{
|
||||
"success": testErr == nil,
|
||||
"duration_ms": durationMs,
|
||||
}
|
||||
if testErr != nil {
|
||||
result["error"] = testErr.Error()
|
||||
}
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func testAIAgent(p *models.AILLMConfig) error {
|
||||
extra := p.ExtraConfig
|
||||
|
||||
// Build HTTP client with ExtraConfig settings
|
||||
timeout := 30 * time.Second
|
||||
if extra.TimeoutSeconds > 0 {
|
||||
timeout = time.Duration(extra.TimeoutSeconds) * time.Second
|
||||
}
|
||||
|
||||
transport := &http.Transport{}
|
||||
if extra.SkipTLSVerify {
|
||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||||
}
|
||||
if extra.Proxy != "" {
|
||||
if proxyURL, err := url.Parse(extra.Proxy); err == nil {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: timeout, Transport: transport}
|
||||
|
||||
var reqURL string
|
||||
var reqBody []byte
|
||||
hdrs := map[string]string{"Content-Type": "application/json"}
|
||||
|
||||
switch p.APIType {
|
||||
case "openai":
|
||||
base := strings.TrimRight(p.APIURL, "/")
|
||||
if strings.HasSuffix(base, "/chat/completions") {
|
||||
reqURL = base
|
||||
} else {
|
||||
reqURL = base + "/chat/completions"
|
||||
}
|
||||
reqBody, _ = json.Marshal(map[string]interface{}{
|
||||
"model": p.Model,
|
||||
"messages": []map[string]string{{"role": "user", "content": "Hi"}},
|
||||
"max_tokens": 5,
|
||||
})
|
||||
hdrs["Authorization"] = "Bearer " + p.APIKey
|
||||
case "claude":
|
||||
reqURL = strings.TrimRight(p.APIURL, "/") + "/v1/messages"
|
||||
reqBody, _ = json.Marshal(map[string]interface{}{
|
||||
"model": p.Model,
|
||||
"messages": []map[string]string{{"role": "user", "content": "Hi"}},
|
||||
"max_tokens": 5,
|
||||
})
|
||||
hdrs["x-api-key"] = p.APIKey
|
||||
hdrs["anthropic-version"] = "2023-06-01"
|
||||
case "gemini":
|
||||
reqURL = strings.TrimRight(p.APIURL, "/") + "/v1beta/models/" + p.Model + ":generateContent?key=" + p.APIKey
|
||||
reqBody, _ = json.Marshal(map[string]interface{}{
|
||||
"contents": []map[string]interface{}{
|
||||
{"parts": []map[string]string{{"text": "Hi"}}},
|
||||
},
|
||||
})
|
||||
default:
|
||||
return fmt.Errorf("unsupported api_type: %s", p.APIType)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", reqURL, bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range hdrs {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
// Apply custom headers from ExtraConfig
|
||||
for k, v := range extra.CustomHeaders {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if len(body) > 500 {
|
||||
body = body[:500]
|
||||
}
|
||||
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ========================
|
||||
// MCP Server test & tools
|
||||
// ========================
|
||||
|
||||
func (rt *Router) mcpServerTest(c *gin.Context) {
|
||||
var body struct {
|
||||
URL string `json:"url"`
|
||||
Headers map[string]string `json:"headers"`
|
||||
}
|
||||
ginx.BindJSON(c, &body)
|
||||
|
||||
if body.URL == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "url is required")
|
||||
}
|
||||
|
||||
obj := &models.MCPServer{
|
||||
URL: body.URL,
|
||||
Headers: body.Headers,
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
tools, testErr := listMCPTools(obj)
|
||||
durationMs := time.Since(start).Milliseconds()
|
||||
|
||||
result := gin.H{
|
||||
"success": testErr == nil,
|
||||
"duration_ms": durationMs,
|
||||
"tool_count": len(tools),
|
||||
}
|
||||
if testErr != nil {
|
||||
result["error"] = testErr.Error()
|
||||
}
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) mcpServerTools(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.MCPServerGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "mcp server not found")
|
||||
}
|
||||
|
||||
tools, err := listMCPTools(obj)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(tools, nil)
|
||||
}
|
||||
|
||||
type mcpTool struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
func listMCPTools(s *models.MCPServer) ([]mcpTool, error) {
|
||||
client := &http.Client{Timeout: 30 * time.Second}
|
||||
|
||||
hdrs := s.Headers
|
||||
|
||||
// Step 1: Initialize
|
||||
initResp, initSessionID, err := sendMCPRPC(client, s.URL, hdrs, "", 1, "initialize", map[string]interface{}{
|
||||
"protocolVersion": "2024-11-05",
|
||||
"capabilities": map[string]interface{}{},
|
||||
"clientInfo": map[string]interface{}{"name": "nightingale", "version": "1.0.0"},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("initialize: %v", err)
|
||||
}
|
||||
_ = initResp
|
||||
|
||||
// Send initialized notification
|
||||
sendMCPRPC(client, s.URL, hdrs, initSessionID, 0, "notifications/initialized", map[string]interface{}{})
|
||||
|
||||
// Step 2: List tools
|
||||
toolsResp, _, err := sendMCPRPC(client, s.URL, hdrs, initSessionID, 2, "tools/list", map[string]interface{}{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("tools/list: %v", err)
|
||||
}
|
||||
|
||||
if toolsResp == nil || toolsResp.Result == nil {
|
||||
return []mcpTool{}, nil
|
||||
}
|
||||
|
||||
toolsRaw, ok := toolsResp.Result["tools"]
|
||||
if !ok {
|
||||
return []mcpTool{}, nil
|
||||
}
|
||||
|
||||
toolsJSON, _ := json.Marshal(toolsRaw)
|
||||
var tools []mcpTool
|
||||
json.Unmarshal(toolsJSON, &tools)
|
||||
return tools, nil
|
||||
}
|
||||
|
||||
type jsonRPCResponse struct {
|
||||
JSONRPC string `json:"jsonrpc"`
|
||||
ID interface{} `json:"id"`
|
||||
Result map[string]interface{} `json:"result"`
|
||||
Error *jsonRPCError `json:"error"`
|
||||
}
|
||||
|
||||
type jsonRPCError struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func sendMCPRPC(client *http.Client, serverURL string, hdrs map[string]string, sessionID string, id int, method string, params interface{}) (*jsonRPCResponse, string, error) {
|
||||
body := map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"method": method,
|
||||
"params": params,
|
||||
}
|
||||
if id > 0 {
|
||||
body["id"] = id
|
||||
}
|
||||
|
||||
reqBody, _ := json.Marshal(body)
|
||||
req, err := http.NewRequest("POST", serverURL, bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json, text/event-stream")
|
||||
if sessionID != "" {
|
||||
req.Header.Set("Mcp-Session-Id", sessionID)
|
||||
}
|
||||
for k, v := range hdrs {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
newSessionID := resp.Header.Get("Mcp-Session-Id")
|
||||
if newSessionID == "" {
|
||||
newSessionID = sessionID
|
||||
}
|
||||
|
||||
// Notification (no id) - no response body expected
|
||||
if id <= 0 {
|
||||
return nil, newSessionID, nil
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
if len(respBody) > 500 {
|
||||
respBody = respBody[:500]
|
||||
}
|
||||
return nil, newSessionID, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, newSessionID, err
|
||||
}
|
||||
|
||||
// Handle SSE response
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/event-stream") {
|
||||
for _, line := range strings.Split(string(respBody), "\n") {
|
||||
if strings.HasPrefix(line, "data: ") {
|
||||
data := strings.TrimPrefix(line, "data: ")
|
||||
var rpcResp jsonRPCResponse
|
||||
if json.Unmarshal([]byte(data), &rpcResp) == nil && (rpcResp.Result != nil || rpcResp.Error != nil) {
|
||||
if rpcResp.Error != nil {
|
||||
return &rpcResp, newSessionID, fmt.Errorf("RPC error %d: %s", rpcResp.Error.Code, rpcResp.Error.Message)
|
||||
}
|
||||
return &rpcResp, newSessionID, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, newSessionID, fmt.Errorf("no valid JSON-RPC response in SSE stream")
|
||||
}
|
||||
|
||||
// Handle JSON response
|
||||
var rpcResp jsonRPCResponse
|
||||
if err := json.Unmarshal(respBody, &rpcResp); err != nil {
|
||||
if len(respBody) > 200 {
|
||||
respBody = respBody[:200]
|
||||
}
|
||||
return nil, newSessionID, fmt.Errorf("invalid response: %s", string(respBody))
|
||||
}
|
||||
|
||||
if rpcResp.Error != nil {
|
||||
return &rpcResp, newSessionID, fmt.Errorf("RPC error %d: %s", rpcResp.Error.Code, rpcResp.Error.Message)
|
||||
}
|
||||
|
||||
return &rpcResp, newSessionID, nil
|
||||
}
|
||||
114
center/router/router_ai_conversation.go
Normal file
114
center/router/router_ai_conversation.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) aiConversationGets(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
lst, err := models.AIConversationGetsByUserId(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiConversationAdd(c *gin.Context) {
|
||||
var obj models.AIConversation
|
||||
ginx.BindJSON(c, &obj)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
obj.UserId = me.Id
|
||||
ginx.Dangerous(obj.Verify())
|
||||
ginx.Dangerous(obj.Create(rt.Ctx))
|
||||
ginx.NewRender(c).Data(obj, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiConversationGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIConversationGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "conversation not found")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if obj.UserId != me.Id {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
messages, err := models.AIConversationMessageGetsByConversationId(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"conversation": obj,
|
||||
"messages": messages,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) aiConversationPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIConversationGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "conversation not found")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if obj.UserId != me.Id {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Title string `json:"title"`
|
||||
}
|
||||
ginx.BindJSON(c, &body)
|
||||
|
||||
ginx.NewRender(c).Message(obj.Update(rt.Ctx, body.Title))
|
||||
}
|
||||
|
||||
func (rt *Router) aiConversationDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIConversationGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "conversation not found")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if obj.UserId != me.Id {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(obj.Delete(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) aiConversationMessageAdd(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
obj, err := models.AIConversationGetById(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "conversation not found")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if obj.UserId != me.Id {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
var msgs []models.AIConversationMessage
|
||||
ginx.BindJSON(c, &msgs)
|
||||
|
||||
for i := range msgs {
|
||||
msgs[i].ConversationId = id
|
||||
ginx.Dangerous(msgs[i].Create(rt.Ctx))
|
||||
}
|
||||
|
||||
// Update conversation timestamp
|
||||
obj.UpdateTime(rt.Ctx)
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
}
|
||||
345
center/router/router_aiagent.go
Normal file
345
center/router/router_aiagent.go
Normal file
@@ -0,0 +1,345 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/aiagent"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// AIChatRequest is the generic chat request dispatched by action_key.
|
||||
type AIChatRequest struct {
|
||||
ActionKey string `json:"action_key"` // e.g. "query_generator"
|
||||
UserInput string `json:"user_input"`
|
||||
History []aiagent.ChatMessage `json:"history,omitempty"`
|
||||
Context map[string]interface{} `json:"context,omitempty"` // action-specific params
|
||||
}
|
||||
|
||||
// actionHandler defines how each action_key is processed.
|
||||
type actionHandler struct {
|
||||
useCase string // maps to AIAgent.UseCase for finding the right agent config
|
||||
validate func(req *AIChatRequest) error
|
||||
selectTools func(req *AIChatRequest) []string
|
||||
buildPrompt func(req *AIChatRequest) string
|
||||
buildInputs func(req *AIChatRequest) map[string]string
|
||||
}
|
||||
|
||||
var actionRegistry = map[string]*actionHandler{
|
||||
"query_generator": {
|
||||
useCase: "chat",
|
||||
validate: validateQueryGenerator,
|
||||
selectTools: selectQueryGeneratorTools,
|
||||
buildPrompt: buildQueryGeneratorPrompt,
|
||||
buildInputs: buildQueryGeneratorInputs,
|
||||
},
|
||||
}
|
||||
|
||||
// --- query_generator action ---
|
||||
|
||||
func ctxStr(ctx map[string]interface{}, key string) string {
|
||||
if v, ok := ctx[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func ctxInt64(ctx map[string]interface{}, key string) int64 {
|
||||
if v, ok := ctx[key]; ok {
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return int64(n)
|
||||
case int64:
|
||||
return n
|
||||
case json.Number:
|
||||
i, _ := n.Int64()
|
||||
return i
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func validateQueryGenerator(req *AIChatRequest) error {
|
||||
dsType := ctxStr(req.Context, "datasource_type")
|
||||
dsID := ctxInt64(req.Context, "datasource_id")
|
||||
if dsType == "" {
|
||||
return fmt.Errorf("context.datasource_type is required")
|
||||
}
|
||||
if dsID == 0 {
|
||||
return fmt.Errorf("context.datasource_id is required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func selectQueryGeneratorTools(req *AIChatRequest) []string {
|
||||
dsType := ctxStr(req.Context, "datasource_type")
|
||||
switch dsType {
|
||||
case "prometheus":
|
||||
return []string{"list_metrics", "get_metric_labels"}
|
||||
case "mysql", "doris", "ck", "clickhouse", "pgsql", "postgresql":
|
||||
return []string{"list_databases", "list_tables", "describe_table"}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func buildQueryGeneratorPrompt(req *AIChatRequest) string {
|
||||
dsType := ctxStr(req.Context, "datasource_type")
|
||||
dbName := ctxStr(req.Context, "database_name")
|
||||
tableName := ctxStr(req.Context, "table_name")
|
||||
|
||||
switch dsType {
|
||||
case "prometheus":
|
||||
return fmt.Sprintf(`You are a PromQL expert. The user wants to query Prometheus metrics.
|
||||
|
||||
User request: %s
|
||||
|
||||
Please use the available tools to explore the metrics and generate the correct PromQL query.
|
||||
- First use list_metrics to find relevant metrics
|
||||
- Then use get_metric_labels to understand the label structure
|
||||
- Finally provide the PromQL query as your Final Answer
|
||||
|
||||
Your Final Answer MUST be a valid JSON object with these fields:
|
||||
{"query": "<the PromQL query>", "explanation": "<brief explanation in the user's language>"}`, req.UserInput)
|
||||
|
||||
default: // SQL-based datasources
|
||||
dbContext := ""
|
||||
if dbName != "" {
|
||||
dbContext += fmt.Sprintf("\nTarget database: %s", dbName)
|
||||
}
|
||||
if tableName != "" {
|
||||
dbContext += fmt.Sprintf("\nTarget table: %s", tableName)
|
||||
}
|
||||
|
||||
return fmt.Sprintf(`You are a SQL expert for %s databases. The user wants to query data.
|
||||
%s
|
||||
User request: %s
|
||||
|
||||
Please use the available tools to explore the database schema and generate the correct SQL query.
|
||||
- Use list_databases to see available databases
|
||||
- Use list_tables to see tables in the target database
|
||||
- Use describe_table to understand the table structure
|
||||
- Finally provide the SQL query as your Final Answer
|
||||
|
||||
Your Final Answer MUST be a valid JSON object with these fields:
|
||||
{"query": "<the SQL query>", "explanation": "<brief explanation in the user's language>"}`, dsType, dbContext, req.UserInput)
|
||||
}
|
||||
}
|
||||
|
||||
func buildQueryGeneratorInputs(req *AIChatRequest) map[string]string {
|
||||
inputs := map[string]string{
|
||||
"user_input": req.UserInput,
|
||||
}
|
||||
for _, key := range []string{"datasource_type", "datasource_id", "database_name", "table_name"} {
|
||||
if v := ctxStr(req.Context, key); v != "" {
|
||||
inputs[key] = v
|
||||
}
|
||||
}
|
||||
// datasource_id may be a number in JSON
|
||||
if inputs["datasource_id"] == "" {
|
||||
if id := ctxInt64(req.Context, "datasource_id"); id > 0 {
|
||||
inputs["datasource_id"] = fmt.Sprintf("%d", id)
|
||||
}
|
||||
}
|
||||
return inputs
|
||||
}
|
||||
|
||||
// --- generic handler ---
|
||||
|
||||
func (rt *Router) aiChat(c *gin.Context) {
|
||||
if !rt.Center.AIAgent.Enable {
|
||||
ginx.Bomb(http.StatusServiceUnavailable, "AI Agent is not enabled")
|
||||
return
|
||||
}
|
||||
|
||||
var req AIChatRequest
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
if req.UserInput == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "user_input is required")
|
||||
return
|
||||
}
|
||||
if req.ActionKey == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "action_key is required")
|
||||
return
|
||||
}
|
||||
if req.Context == nil {
|
||||
req.Context = make(map[string]interface{})
|
||||
}
|
||||
|
||||
handler, ok := actionRegistry[req.ActionKey]
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusBadRequest, "unsupported action_key: %s", req.ActionKey)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("[AIChat] action=%s, user_input=%q", req.ActionKey, truncStr(req.UserInput, 100))
|
||||
|
||||
// Action-specific validation
|
||||
if handler.validate != nil {
|
||||
if err := handler.validate(&req); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Find AI agent by use_case
|
||||
agent, err := models.AIAgentGetByUseCase(rt.Ctx, handler.useCase)
|
||||
if err != nil || agent == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "no AI agent configured for use_case=%s", handler.useCase)
|
||||
return
|
||||
}
|
||||
|
||||
// Resolve LLM config
|
||||
llmCfg, err := models.AILLMConfigGetById(rt.Ctx, agent.LLMConfigId)
|
||||
if err != nil || llmCfg == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "referenced LLM config not found")
|
||||
return
|
||||
}
|
||||
agent.LLMConfig = llmCfg
|
||||
|
||||
// Select tools
|
||||
var tools []aiagent.AgentTool
|
||||
if handler.selectTools != nil {
|
||||
toolNames := handler.selectTools(&req)
|
||||
if toolNames != nil {
|
||||
tools = aiagent.GetBuiltinToolDefs(toolNames)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse extra config
|
||||
extraConfig := llmCfg.ExtraConfig
|
||||
|
||||
timeout := 120000
|
||||
if extraConfig.TimeoutSeconds > 0 {
|
||||
timeout = extraConfig.TimeoutSeconds * 1000
|
||||
}
|
||||
|
||||
// Build prompt
|
||||
userPrompt := ""
|
||||
if handler.buildPrompt != nil {
|
||||
userPrompt = handler.buildPrompt(&req)
|
||||
}
|
||||
|
||||
// Build workflow inputs
|
||||
inputs := map[string]string{"user_input": req.UserInput}
|
||||
if handler.buildInputs != nil {
|
||||
inputs = handler.buildInputs(&req)
|
||||
}
|
||||
|
||||
// Create agent
|
||||
agentCfg := aiagent.NewAgent(&aiagent.AIAgentConfig{
|
||||
Provider: llmCfg.APIType,
|
||||
LLMURL: llmCfg.APIURL,
|
||||
Model: llmCfg.Model,
|
||||
APIKey: llmCfg.APIKey,
|
||||
Headers: extraConfig.CustomHeaders,
|
||||
AgentMode: aiagent.AgentModeReAct,
|
||||
Tools: tools,
|
||||
Timeout: timeout,
|
||||
Stream: true,
|
||||
UserPromptTemplate: userPrompt,
|
||||
SkipSSLVerify: extraConfig.SkipTLSVerify,
|
||||
Proxy: extraConfig.Proxy,
|
||||
Temperature: extraConfig.Temperature,
|
||||
MaxTokens: extraConfig.MaxTokens,
|
||||
})
|
||||
|
||||
// Inject PromClient getter
|
||||
aiagent.SetPromClientGetter(func(dsId int64) prom.API {
|
||||
return rt.PromClients.GetCli(dsId)
|
||||
})
|
||||
|
||||
// Streaming setup
|
||||
streamChan := make(chan *models.StreamChunk, 100)
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Stream: true,
|
||||
StreamChan: streamChan,
|
||||
Inputs: inputs,
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("X-Accel-Buffering", "no")
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
logger.Errorf("[AIChat] PANIC in agent goroutine: %v", r)
|
||||
streamChan <- &models.StreamChunk{
|
||||
Type: models.StreamTypeError,
|
||||
Content: fmt.Sprintf("internal error: %v", r),
|
||||
Done: true,
|
||||
Timestamp: time.Now().UnixMilli(),
|
||||
}
|
||||
close(streamChan)
|
||||
}
|
||||
}()
|
||||
_, _, err := agentCfg.Process(rt.Ctx, wfCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("[AIChat] agent Process error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Stream SSE events
|
||||
var accumulatedMessage string
|
||||
c.Stream(func(w io.Writer) bool {
|
||||
chunk, ok := <-streamChan
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(chunk)
|
||||
|
||||
if chunk.Type == models.StreamTypeText || chunk.Type == models.StreamTypeThinking {
|
||||
if chunk.Delta != "" {
|
||||
accumulatedMessage += chunk.Delta
|
||||
} else if chunk.Content != "" {
|
||||
accumulatedMessage += chunk.Content
|
||||
}
|
||||
}
|
||||
|
||||
if chunk.Type == models.StreamTypeError {
|
||||
fmt.Fprintf(w, "event: error\ndata: %s\n\n", data)
|
||||
c.Writer.Flush()
|
||||
return false
|
||||
}
|
||||
|
||||
if chunk.Done || chunk.Type == models.StreamTypeDone {
|
||||
doneData := map[string]interface{}{
|
||||
"type": "done",
|
||||
"duration_ms": time.Since(startTime).Milliseconds(),
|
||||
"message": accumulatedMessage,
|
||||
"response": chunk.Content,
|
||||
}
|
||||
finalData, _ := json.Marshal(doneData)
|
||||
fmt.Fprintf(w, "event: done\ndata: %s\n\n", finalData)
|
||||
c.Writer.Flush()
|
||||
return false
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "event: chunk\ndata: %s\n\n", data)
|
||||
c.Writer.Flush()
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func truncStr(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -10,9 +10,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
|
||||
@@ -263,11 +264,11 @@ func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, erro
|
||||
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
event.NotifyRules, err = GetEventNorifyRuleNames(ctx, event.NotifyRuleIds)
|
||||
event.NotifyRules, err = GetEventNotifyRuleNames(ctx, event.NotifyRuleIds)
|
||||
return event, err
|
||||
}
|
||||
|
||||
func GetEventNorifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
|
||||
func GetEventNotifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
|
||||
notifyRuleNames := make([]*models.EventNotifyRule, 0)
|
||||
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
|
||||
if err != nil {
|
||||
@@ -305,3 +306,123 @@ func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
|
||||
hash := ginx.QueryStr(c, "hash")
|
||||
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
|
||||
}
|
||||
|
||||
func (rt *Router) eventTagKeys(c *gin.Context) {
|
||||
// 获取最近1天的活跃告警事件
|
||||
now := time.Now().Unix()
|
||||
stime := now - 24*3600
|
||||
etime := now
|
||||
|
||||
// 获取用户可见的业务组ID列表
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get business group ids: %v", err)
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 查询活跃告警事件,限制数量以提高性能
|
||||
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get current alert events: %v", err)
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果没有查到事件,返回默认标签
|
||||
if len(events) == 0 {
|
||||
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 收集所有标签键并去重
|
||||
tagKeys := make(map[string]struct{})
|
||||
for _, event := range events {
|
||||
for key := range event.TagsMap {
|
||||
tagKeys[key] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为字符串切片
|
||||
var result []string
|
||||
for key := range tagKeys {
|
||||
result = append(result, key)
|
||||
}
|
||||
|
||||
// 如果没有收集到任何标签键,返回默认值
|
||||
if len(result) == 0 {
|
||||
result = []string{"ident", "app", "service", "instance"}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) eventTagValues(c *gin.Context) {
|
||||
// 获取标签key
|
||||
tagKey := ginx.QueryStr(c, "key")
|
||||
|
||||
// 获取最近1天的活跃告警事件
|
||||
now := time.Now().Unix()
|
||||
stime := now - 24*3600
|
||||
etime := now
|
||||
|
||||
// 获取用户可见的业务组ID列表
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get business group ids: %v", err)
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 查询活跃告警事件,获取更多数据以保证统计准确性
|
||||
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get current alert events: %v", err)
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果没有查到事件,返回空数组
|
||||
if len(events) == 0 {
|
||||
ginx.NewRender(c).Data([]string{}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 统计标签值出现次数
|
||||
valueCount := make(map[string]int)
|
||||
for _, event := range events {
|
||||
// TagsMap已经在AlertCurEventsGet中处理,直接使用
|
||||
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
|
||||
valueCount[value]++
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为切片并按出现次数降序排序
|
||||
type tagValue struct {
|
||||
value string
|
||||
count int
|
||||
}
|
||||
|
||||
tagValues := make([]tagValue, 0, len(valueCount))
|
||||
for value, count := range valueCount {
|
||||
tagValues = append(tagValues, tagValue{value, count})
|
||||
}
|
||||
|
||||
// 按出现次数降序排序
|
||||
sort.Slice(tagValues, func(i, j int) bool {
|
||||
return tagValues[i].count > tagValues[j].count
|
||||
})
|
||||
|
||||
// 只取Top20并转换为字符串数组
|
||||
limit := 20
|
||||
if len(tagValues) < limit {
|
||||
limit = len(tagValues)
|
||||
}
|
||||
|
||||
result := make([]string, 0, limit)
|
||||
for i := 0; i < limit; i++ {
|
||||
result = append(result, tagValues[i].value)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
168
center/router/router_alert_eval_detail.go
Normal file
168
center/router/router_alert_eval_detail.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
c.String(http.StatusBadRequest, "invalid rule id format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
|
||||
RuleID: id,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
|
||||
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
|
||||
ruleId, _ := strconv.ParseInt(id, 10, 64)
|
||||
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if rule == nil {
|
||||
return nil, "", fmt.Errorf("no such alert rule")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
|
||||
// Get datasource IDs for this rule
|
||||
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
|
||||
if len(dsIds) == 0 {
|
||||
// No datasources found (e.g. host rule), try local grep
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Find unique target nodes via hash ring, with DB fallback
|
||||
nodeSet := make(map[string]struct{})
|
||||
for _, dsId := range dsIds {
|
||||
node, err := rt.getNodeForDatasource(dsId, id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
nodeSet[node] = struct{}{}
|
||||
}
|
||||
|
||||
if len(nodeSet) == 0 {
|
||||
// Hash ring not ready, grep locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Collect logs from all target nodes
|
||||
var allLogs []string
|
||||
var instances []string
|
||||
|
||||
for node := range nodeSet {
|
||||
if node == instance {
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, node)
|
||||
}
|
||||
} else {
|
||||
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, nodeAddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort logs by timestamp descending
|
||||
sort.Slice(allLogs, func(i, j int) bool {
|
||||
return allLogs[i] > allLogs[j]
|
||||
})
|
||||
|
||||
if len(allLogs) > loggrep.MaxLogLines {
|
||||
allLogs = allLogs[:loggrep.MaxLogLines]
|
||||
}
|
||||
|
||||
return allLogs, strings.Join(instances, ", "), nil
|
||||
}
|
||||
|
||||
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
@@ -62,11 +62,11 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
|
||||
recovered, dsIds, cates, ruleId, query)
|
||||
recovered, dsIds, cates, ruleId, query, []int64{})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
|
||||
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
|
||||
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
@@ -115,7 +115,18 @@ func (rt *Router) alertHisEventsDelete(c *gin.Context) {
|
||||
time.Sleep(100 * time.Millisecond) // 防止锁表
|
||||
}
|
||||
}()
|
||||
ginx.NewRender(c).Message("Alert history events deletion started")
|
||||
ginx.NewRender(c).Data("Alert history events deletion started", nil)
|
||||
}
|
||||
|
||||
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
|
||||
|
||||
func init() {
|
||||
TransferEventToCur = transferEventToCur
|
||||
}
|
||||
|
||||
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
|
||||
cur := event.ToCur()
|
||||
return cur
|
||||
}
|
||||
|
||||
func (rt *Router) alertHisEventGet(c *gin.Context) {
|
||||
@@ -141,8 +152,8 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
|
||||
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
|
||||
ginx.NewRender(c).Data(event, err)
|
||||
event.NotifyRules, err = GetEventNotifyRuleNames(rt.Ctx, event.NotifyRuleIds)
|
||||
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
|
||||
}
|
||||
|
||||
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {
|
||||
|
||||
@@ -11,15 +11,17 @@ import (
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -33,13 +35,13 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
ars[i].FillSeverities()
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
|
||||
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
|
||||
stime = ginx.QueryInt64(c, "stime", 0)
|
||||
etime = ginx.QueryInt64(c, "etime", 0)
|
||||
if etime == 0 {
|
||||
@@ -75,20 +77,17 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
if err == nil {
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
rids := make([]int64, 0, len(ars))
|
||||
names := make([]string, 0, len(ars))
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
ars[i].FillSeverities()
|
||||
|
||||
if len(ars[i].DatasourceQueries) != 0 {
|
||||
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
|
||||
}
|
||||
|
||||
rids = append(rids, ars[i].Id)
|
||||
names = append(names, ars[i].UpdateBy)
|
||||
}
|
||||
|
||||
stime, etime := getAlertCueEventTimeRange(c)
|
||||
stime, etime := GetAlertCueEventTimeRange(c)
|
||||
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
|
||||
if cnt != nil {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
@@ -96,14 +95,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
|
||||
if users != nil {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
if user, exist := users[ars[i].UpdateBy]; exist {
|
||||
ars[i].UpdateByNickname = user.Nickname
|
||||
}
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -135,6 +127,7 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
|
||||
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -157,6 +150,120 @@ func (rt *Router) alertRuleAddByFE(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
type AlertRuleTryRunForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertRuleConfig models.AlertRule `json:"config" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleNotifyTryRun(c *gin.Context) {
|
||||
// check notify channels of old version
|
||||
var f AlertRuleTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
if f.AlertRuleConfig.NotifyVersion == 1 {
|
||||
for _, id := range f.AlertRuleConfig.NotifyRuleIds {
|
||||
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
for _, notifyConfig := range notifyRule.NotifyConfigs {
|
||||
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("notification test ok", nil)
|
||||
return
|
||||
}
|
||||
|
||||
if len(f.AlertRuleConfig.NotifyChannelsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, "no notify channels selected")
|
||||
}
|
||||
|
||||
if len(f.AlertRuleConfig.NotifyGroupsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, "no notify groups selected")
|
||||
}
|
||||
|
||||
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
|
||||
ugids := f.AlertRuleConfig.NotifyGroupsJSON
|
||||
ngids := make([]int64, 0)
|
||||
for i := 0; i < len(ugids); i++ {
|
||||
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
|
||||
ngids = append(ngids, gid)
|
||||
}
|
||||
}
|
||||
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
|
||||
uids := make([]int64, 0)
|
||||
for i := range userGroups {
|
||||
uids = append(uids, userGroups[i].UserIds...)
|
||||
}
|
||||
users := rt.UserCache.GetByUserIds(uids)
|
||||
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
|
||||
flag := true
|
||||
// ignore non-default channels
|
||||
switch NotifyChannels {
|
||||
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
|
||||
models.Telegram, models.Email, models.FeishuCard:
|
||||
// do nothing
|
||||
default:
|
||||
continue
|
||||
}
|
||||
// default channels
|
||||
for ui := range users {
|
||||
if _, b := users[ui].ExtractToken(NotifyChannels); b {
|
||||
flag = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if flag {
|
||||
ancs = append(ancs, NotifyChannels)
|
||||
}
|
||||
}
|
||||
if len(ancs) > 0 {
|
||||
ginx.Dangerous(errors.New(fmt.Sprintf("All users are missing notify channel configurations. Please check for missing tokens (each channel should be configured with at least one user). %v", ancs)))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("notification test ok", nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleEnableTryRun(c *gin.Context) {
|
||||
// check notify channels of old version
|
||||
var f AlertRuleTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
if f.AlertRuleConfig.Disabled == 1 {
|
||||
ginx.Bomb(http.StatusOK, "rule is disabled")
|
||||
}
|
||||
|
||||
if mute.TimeSpanMuteStrategy(&f.AlertRuleConfig, &curEvent) {
|
||||
ginx.Bomb(http.StatusOK, "event is not match for period of time")
|
||||
}
|
||||
|
||||
if mute.BgNotMatchMuteStrategy(&f.AlertRuleConfig, &curEvent, rt.TargetCache) {
|
||||
ginx.Bomb(http.StatusOK, "event target busi group not match rule busi group")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("event is effective", nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleAddByImport(c *gin.Context) {
|
||||
username := c.MustGet("username").(string)
|
||||
|
||||
@@ -174,6 +281,15 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
|
||||
models.DataSourceQueryAll,
|
||||
}
|
||||
}
|
||||
|
||||
// 将导入的规则统一转为新版本的通知规则配置
|
||||
lst[i].NotifyVersion = 1
|
||||
lst[i].NotifyChannelsJSON = []string{}
|
||||
lst[i].NotifyGroupsJSON = []string{}
|
||||
lst[i].NotifyChannels = ""
|
||||
lst[i].NotifyGroups = ""
|
||||
lst[i].Callbacks = ""
|
||||
lst[i].CallbacksJSON = []string{}
|
||||
}
|
||||
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
@@ -192,19 +308,52 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
|
||||
var f promRuleForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
// 首先尝试解析带 groups 的格式
|
||||
var pr struct {
|
||||
Groups []models.PromRuleGroup `yaml:"groups"`
|
||||
}
|
||||
err := yaml.Unmarshal([]byte(f.Payload), &pr)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
|
||||
|
||||
var groups []models.PromRuleGroup
|
||||
|
||||
if err != nil || len(pr.Groups) == 0 {
|
||||
// 如果解析失败或没有 groups,尝试解析规则数组格式
|
||||
var rules []models.PromRule
|
||||
err = yaml.Unmarshal([]byte(f.Payload), &rules)
|
||||
if err != nil {
|
||||
// 最后尝试解析单个规则格式
|
||||
var singleRule models.PromRule
|
||||
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
|
||||
}
|
||||
|
||||
// 验证单个规则是否有效
|
||||
if singleRule.Alert == "" && singleRule.Record == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
|
||||
}
|
||||
|
||||
rules = []models.PromRule{singleRule}
|
||||
}
|
||||
|
||||
// 验证规则数组是否为空
|
||||
if len(rules) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
|
||||
}
|
||||
|
||||
// 将规则数组包装成 group
|
||||
groups = []models.PromRuleGroup{
|
||||
{
|
||||
Name: "imported_rules",
|
||||
Rules: rules,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// 使用已解析的 groups
|
||||
groups = pr.Groups
|
||||
}
|
||||
|
||||
if len(pr.Groups) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
|
||||
}
|
||||
|
||||
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
|
||||
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
|
||||
username := c.MustGet("username").(string)
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
|
||||
@@ -349,8 +498,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "fields empty")
|
||||
}
|
||||
|
||||
f.Fields["update_by"] = c.MustGet("username").(string)
|
||||
f.Fields["update_at"] = time.Now().Unix()
|
||||
updateBy := c.MustGet("username").(string)
|
||||
updateAt := time.Now().Unix()
|
||||
|
||||
for i := 0; i < len(f.Ids); i++ {
|
||||
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
|
||||
@@ -367,7 +516,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(originRule)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,7 +528,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(ar.AnnotationsJSON)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -393,7 +540,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
b, err := json.Marshal(ar.AnnotationsJSON)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -403,7 +549,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
callback := callbacks.(string)
|
||||
if !strings.Contains(ar.Callbacks, callback) {
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -413,7 +558,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
if callbacks, has := f.Fields["callbacks"]; has {
|
||||
callback := callbacks.(string)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -423,7 +567,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
bytes, err := json.Marshal(datasourceQueries)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -439,6 +582,12 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
|
||||
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
|
||||
}
|
||||
}
|
||||
|
||||
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
|
||||
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
|
||||
"update_by": updateBy,
|
||||
"update_at": updateAt,
|
||||
}))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
@@ -733,3 +882,28 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) timezonesGet(c *gin.Context) {
|
||||
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
|
||||
timezones := []string{
|
||||
"Local",
|
||||
"UTC",
|
||||
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
|
||||
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
|
||||
"Asia/Dubai", // UTC+4
|
||||
"Asia/Kolkata", // UTC+5:30
|
||||
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
|
||||
"Europe/London", // UTC+0 (代表 UTC)
|
||||
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
|
||||
"Europe/Moscow", // UTC+3
|
||||
"America/New_York", // UTC-5 (代表 America/Toronto 等)
|
||||
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
|
||||
"America/Denver", // UTC-7
|
||||
"America/Los_Angeles", // UTC-8
|
||||
"America/Sao_Paulo", // UTC-3
|
||||
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
|
||||
"Pacific/Auckland", // UTC+12
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(timezones, nil)
|
||||
}
|
||||
|
||||
@@ -2,13 +2,17 @@ package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// Return all, front-end search and paging
|
||||
@@ -26,6 +30,7 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -62,6 +67,7 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -104,6 +110,148 @@ func (rt *Router) alertSubscribeAdd(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
type SubscribeTryRunForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
SubscribeConfig models.AlertSubscribe `json:"config" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertSubscribeTryRun(c *gin.Context) {
|
||||
var f SubscribeTryRunForm
|
||||
ginx.BindJSON(c, &f)
|
||||
ginx.Dangerous(f.SubscribeConfig.Verify())
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if hisEvent == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "event not found")
|
||||
}
|
||||
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
// 先判断匹配条件
|
||||
if !f.SubscribeConfig.MatchCluster(curEvent.DatasourceId) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event datasource not match"))
|
||||
}
|
||||
|
||||
if len(f.SubscribeConfig.RuleIds) != 0 {
|
||||
match := false
|
||||
for _, rid := range f.SubscribeConfig.RuleIds {
|
||||
if rid == curEvent.RuleId {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event rule id not match"))
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配 tag
|
||||
f.SubscribeConfig.Parse()
|
||||
if !common.MatchTags(curEvent.TagsMap, f.SubscribeConfig.ITags) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event tags not match"))
|
||||
}
|
||||
|
||||
// 匹配group name
|
||||
if !common.MatchGroupsName(curEvent.GroupName, f.SubscribeConfig.IBusiGroups) {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event group name not match"))
|
||||
}
|
||||
|
||||
// 检查严重级别(Severity)匹配
|
||||
if len(f.SubscribeConfig.SeveritiesJson) != 0 {
|
||||
match := false
|
||||
for _, s := range f.SubscribeConfig.SeveritiesJson {
|
||||
if s == curEvent.Severity || s == 0 {
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event severity not match"))
|
||||
}
|
||||
}
|
||||
|
||||
// 新版本通知规则
|
||||
if f.SubscribeConfig.NotifyVersion == 1 {
|
||||
if len(f.SubscribeConfig.NotifyRuleIds) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify rules selected"))
|
||||
}
|
||||
|
||||
for _, id := range f.SubscribeConfig.NotifyRuleIds {
|
||||
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, i18n.Sprintf(lang, "subscribe notify rule not found: %v", err))
|
||||
}
|
||||
|
||||
for _, notifyConfig := range notifyRule.NotifyConfigs {
|
||||
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "notify rule send error: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notification test ok"), nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 旧版通知方式
|
||||
f.SubscribeConfig.ModifyEvent(&curEvent)
|
||||
if len(curEvent.NotifyChannelsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify channels selected"))
|
||||
}
|
||||
|
||||
if len(curEvent.NotifyGroupsJSON) == 0 {
|
||||
ginx.Bomb(http.StatusOK, i18n.Sprintf(lang, "no notify groups selected"))
|
||||
}
|
||||
|
||||
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
|
||||
ugids := strings.Fields(f.SubscribeConfig.UserGroupIds)
|
||||
ngids := make([]int64, 0)
|
||||
for i := 0; i < len(ugids); i++ {
|
||||
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
|
||||
ngids = append(ngids, gid)
|
||||
}
|
||||
}
|
||||
|
||||
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
|
||||
uids := make([]int64, 0)
|
||||
for i := range userGroups {
|
||||
uids = append(uids, userGroups[i].UserIds...)
|
||||
}
|
||||
users := rt.UserCache.GetByUserIds(uids)
|
||||
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
|
||||
flag := true
|
||||
// ignore non-default channels
|
||||
switch NotifyChannels {
|
||||
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
|
||||
models.Telegram, models.Email, models.FeishuCard:
|
||||
// do nothing
|
||||
default:
|
||||
continue
|
||||
}
|
||||
// default channels
|
||||
for ui := range users {
|
||||
if _, b := users[ui].ExtractToken(NotifyChannels); b {
|
||||
flag = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if flag {
|
||||
ancs = append(ancs, NotifyChannels)
|
||||
}
|
||||
}
|
||||
if len(ancs) > 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "all users missing notify channel configurations: %v", ancs))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notify settings ok"), nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
var fs []models.AlertSubscribe
|
||||
ginx.BindJSON(c, &fs)
|
||||
@@ -142,6 +290,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
"busi_groups",
|
||||
"note",
|
||||
"notify_rule_ids",
|
||||
"notify_version",
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -17,6 +17,7 @@ type boardForm struct {
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Note string `json:"note"`
|
||||
Configs string `json:"configs"`
|
||||
Public int `json:"public"`
|
||||
PublicCate int `json:"public_cate"`
|
||||
@@ -34,6 +35,7 @@ func (rt *Router) boardAdd(c *gin.Context) {
|
||||
Name: f.Name,
|
||||
Ident: f.Ident,
|
||||
Tags: f.Tags,
|
||||
Note: f.Note,
|
||||
Configs: f.Configs,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
@@ -115,6 +117,10 @@ func (rt *Router) boardPureGet(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusNotFound, "No such dashboard")
|
||||
}
|
||||
|
||||
// 清除创建者和更新者信息
|
||||
board.CreateBy = ""
|
||||
board.UpdateBy = ""
|
||||
|
||||
ginx.NewRender(c).Data(board, nil)
|
||||
}
|
||||
|
||||
@@ -180,10 +186,11 @@ func (rt *Router) boardPut(c *gin.Context) {
|
||||
bo.Name = f.Name
|
||||
bo.Ident = f.Ident
|
||||
bo.Tags = f.Tags
|
||||
bo.Note = f.Note
|
||||
bo.UpdateBy = me.Username
|
||||
bo.UpdateAt = time.Now().Unix()
|
||||
|
||||
err = bo.Update(rt.Ctx, "name", "ident", "tags", "update_by", "update_at")
|
||||
err = bo.Update(rt.Ctx, "name", "ident", "tags", "note", "update_by", "update_at")
|
||||
ginx.NewRender(c).Data(bo, err)
|
||||
}
|
||||
|
||||
@@ -253,6 +260,9 @@ func (rt *Router) boardGets(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
boards, err := models.BoardGetsByGroupId(rt.Ctx, bgid, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -266,6 +276,9 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
|
||||
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -305,6 +318,7 @@ func (rt *Router) boardGetsByGids(c *gin.Context) {
|
||||
boards[i].Bgids = ids
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -3,8 +3,8 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
@@ -27,6 +27,8 @@ func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
models.FillUpdateByNicknames(rt.Ctx, arr)
|
||||
|
||||
ginx.NewRender(c).Data(arr, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,12 +2,14 @@ package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -29,7 +31,7 @@ func (rt *Router) builtinMetricsAdd(c *gin.Context) {
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
lst[i].Lang = lang
|
||||
lst[i].UUID = time.Now().UnixNano()
|
||||
lst[i].UUID = time.Now().UnixMicro()
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
@@ -48,11 +50,12 @@ func (rt *Router) builtinMetricsGets(c *gin.Context) {
|
||||
lang = "zh_CN"
|
||||
}
|
||||
|
||||
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
|
||||
bmInDB, err := models.BuiltinMetricGets(rt.Ctx, "", collector, typ, query, unit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
|
||||
bm, total, err := integration.BuiltinPayloadInFile.BuiltinMetricGets(bmInDB, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": bm,
|
||||
"total": total,
|
||||
@@ -100,8 +103,26 @@ func (rt *Router) builtinMetricsTypes(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
|
||||
ginx.NewRender(c).Data(metricTypeList, err)
|
||||
metricTypeListInDB, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
metricTypeListInFile := integration.BuiltinPayloadInFile.BuiltinMetricTypes(lang, collector, query)
|
||||
|
||||
typeMap := make(map[string]struct{})
|
||||
for _, metricType := range metricTypeListInDB {
|
||||
typeMap[metricType] = struct{}{}
|
||||
}
|
||||
for _, metricType := range metricTypeListInFile {
|
||||
typeMap[metricType] = struct{}{}
|
||||
}
|
||||
|
||||
metricTypeList := make([]string, 0, len(typeMap))
|
||||
for metricType := range typeMap {
|
||||
metricTypeList = append(metricTypeList, metricType)
|
||||
}
|
||||
sort.Strings(metricTypeList)
|
||||
|
||||
ginx.NewRender(c).Data(metricTypeList, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
|
||||
@@ -109,5 +130,24 @@ func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
|
||||
collectorListInDB, err := models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
collectorListInFile := integration.BuiltinPayloadInFile.BuiltinMetricCollectors(lang, typ, query)
|
||||
|
||||
collectorMap := make(map[string]struct{})
|
||||
for _, collector := range collectorListInDB {
|
||||
collectorMap[collector] = struct{}{}
|
||||
}
|
||||
for _, collector := range collectorListInFile {
|
||||
collectorMap[collector] = struct{}{}
|
||||
}
|
||||
|
||||
collectorList := make([]string, 0, len(collectorMap))
|
||||
for collector := range collectorMap {
|
||||
collectorList = append(collectorList, collector)
|
||||
}
|
||||
sort.Strings(collectorList)
|
||||
|
||||
ginx.NewRender(c).Data(collectorList, nil)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -18,6 +19,7 @@ type Board struct {
|
||||
Tags string `json:"tags"`
|
||||
Configs interface{} `json:"configs"`
|
||||
UUID int64 `json:"uuid"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
@@ -128,6 +130,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Note: dashboard.Note,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
@@ -163,6 +166,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Note: dashboard.Note,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
@@ -192,13 +196,26 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
|
||||
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
if typ == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "type is required")
|
||||
return
|
||||
}
|
||||
ComponentID := ginx.QueryInt64(c, "component_id", 0)
|
||||
|
||||
cate := ginx.QueryStr(c, "cate", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lstInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayload(typ, cate, query, uint64(ComponentID))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if len(lstInFile) > 0 {
|
||||
lst = append(lst, lstInFile...)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
@@ -206,21 +223,31 @@ func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
ComponentID := ginx.QueryInt64(c, "component_id", 0)
|
||||
|
||||
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
|
||||
ginx.NewRender(c).Data(cates, err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
|
||||
func (rt *Router) builtinPayloadGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
catesInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayloadCates(typ, uint64(ComponentID))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
if bp == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
|
||||
// 使用 map 进行去重
|
||||
cateMap := make(map[string]bool)
|
||||
|
||||
// 添加数据库中的分类
|
||||
for _, cate := range cates {
|
||||
cateMap[cate] = true
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(bp, nil)
|
||||
// 添加文件中的分类
|
||||
for _, cate := range catesInFile {
|
||||
cateMap[cate] = true
|
||||
}
|
||||
|
||||
// 将去重后的结果转换回切片
|
||||
result := make([]string, 0, len(cateMap))
|
||||
for cate := range cateMap {
|
||||
result = append(result, cate)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
@@ -251,6 +278,7 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
|
||||
req.Name = dashboard.Name
|
||||
req.Tags = dashboard.Tags
|
||||
req.Note = dashboard.Note
|
||||
} else if req.Type == "collect" {
|
||||
c := make(map[string]interface{})
|
||||
if _, err := toml.Decode(req.Content, &c); err != nil {
|
||||
@@ -273,14 +301,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
|
||||
uuid := ginx.QueryInt64(c, "uuid", 0)
|
||||
// 优先以 uuid 为准
|
||||
if uuid != 0 {
|
||||
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
|
||||
return
|
||||
}
|
||||
func (rt *Router) builtinPayloadsGetByUUID(c *gin.Context) {
|
||||
uuid := ginx.QueryInt64(c, "uuid")
|
||||
|
||||
id := ginx.QueryInt64(c, "id", 0)
|
||||
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if bp != nil {
|
||||
ginx.NewRender(c).Data(bp, nil)
|
||||
} else {
|
||||
ginx.NewRender(c).Data(integration.BuiltinPayloadInFile.IndexData[uuid], nil)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -119,6 +119,9 @@ func (rt *Router) busiGroupGets(c *gin.Context) {
|
||||
if len(lst) == 0 {
|
||||
lst = []models.BusiGroup{}
|
||||
}
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
captcha "github.com/mojocn/base64Captcha"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) chartShareGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const EMBEDDEDDASHBOARD = "embedded-dashboards"
|
||||
@@ -15,6 +15,9 @@ func (rt *Router) configsGet(c *gin.Context) {
|
||||
prefix := ginx.QueryStr(c, "prefix", "")
|
||||
limit := ginx.QueryInt(c, "limit", 10)
|
||||
configs, err := models.ConfigsGets(rt.Ctx, prefix, limit, ginx.Offset(c, limit))
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, configs)
|
||||
}
|
||||
ginx.NewRender(c).Data(configs, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
type confPropCrypto struct {
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -47,9 +53,41 @@ func (rt *Router) datasourceList(c *gin.Context) {
|
||||
func (rt *Router) datasourceGetsByService(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "typ", "")
|
||||
lst, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, "", "", "")
|
||||
|
||||
openRsa := rt.Center.RSA.OpenRSA
|
||||
for _, item := range lst {
|
||||
if err := item.Encrypt(openRsa, rt.HTTP.RSA.RSAPublicKey); err != nil {
|
||||
logger.Errorf("datasource %+v encrypt failed: %v", item, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceRsaConfigGet(c *gin.Context) {
|
||||
if rt.Center.RSA.OpenRSA {
|
||||
publicKey := ""
|
||||
privateKey := ""
|
||||
if len(rt.HTTP.RSA.RSAPublicKey) > 0 {
|
||||
publicKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPublicKey)
|
||||
}
|
||||
if len(rt.HTTP.RSA.RSAPrivateKey) > 0 {
|
||||
privateKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPrivateKey)
|
||||
}
|
||||
logger.Debugf("OpenRSA=%v", rt.Center.RSA.OpenRSA)
|
||||
ginx.NewRender(c).Data(models.RsaConfig{
|
||||
OpenRSA: rt.Center.RSA.OpenRSA,
|
||||
RSAPublicKey: publicKey,
|
||||
RSAPrivateKey: privateKey,
|
||||
RSAPassWord: rt.HTTP.RSA.RSAPassWord,
|
||||
}, nil)
|
||||
} else {
|
||||
ginx.NewRender(c).Data(models.RsaConfig{
|
||||
OpenRSA: rt.Center.RSA.OpenRSA,
|
||||
}, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceBriefs(c *gin.Context) {
|
||||
var dss []*models.Datasource
|
||||
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
|
||||
@@ -100,7 +138,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
|
||||
if !req.ForceSave {
|
||||
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
|
||||
err = DatasourceCheck(req)
|
||||
err = DatasourceCheck(c, req)
|
||||
if err != nil {
|
||||
Dangerous(c, err)
|
||||
return
|
||||
@@ -108,6 +146,121 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range req.SettingsJson {
|
||||
if strings.Contains(k, "cluster_name") {
|
||||
req.ClusterName = v.(string)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.OPENSEARCH {
|
||||
b, err := json.Marshal(req.SettingsJson)
|
||||
if err != nil {
|
||||
logger.Warningf("marshal settings fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var os opensearch.OpenSearch
|
||||
err = json.Unmarshal(b, &os)
|
||||
if err != nil {
|
||||
logger.Warningf("unmarshal settings fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(os.Nodes) == 0 {
|
||||
logger.Warningf("nodes empty, %+v", req)
|
||||
return
|
||||
}
|
||||
|
||||
req.HTTPJson = models.HTTP{
|
||||
Timeout: os.Timeout,
|
||||
Url: os.Nodes[0],
|
||||
Headers: os.Headers,
|
||||
TLS: models.TLS{
|
||||
SkipTlsVerify: os.TLS.SkipTlsVerify,
|
||||
},
|
||||
}
|
||||
|
||||
req.AuthJson = models.Auth{
|
||||
BasicAuth: os.Basic.Enable,
|
||||
BasicAuthUser: os.Basic.Username,
|
||||
BasicAuthPassword: os.Basic.Password,
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.CLICKHOUSE {
|
||||
b, err := json.Marshal(req.SettingsJson)
|
||||
if err != nil {
|
||||
logger.Warningf("marshal clickhouse settings failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var ckConfig clickhouse.Clickhouse
|
||||
err = json.Unmarshal(b, &ckConfig)
|
||||
if err != nil {
|
||||
logger.Warningf("unmarshal clickhouse settings failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
// 检查ckconfig的nodes不应该以http://或https://开头
|
||||
for _, addr := range ckConfig.Nodes {
|
||||
if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") {
|
||||
err = fmt.Errorf("clickhouse node address should not start with http:// or https:// : %s", addr)
|
||||
logger.Warningf("clickhouse node address invalid: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// InitCli 会自动检测并选择 HTTP 或 Native 协议
|
||||
err = ckConfig.InitCli()
|
||||
if err != nil {
|
||||
logger.Warningf("clickhouse connection failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 执行 SHOW DATABASES 测试连通性
|
||||
_, err = ckConfig.ShowDatabases(context.Background())
|
||||
if err != nil {
|
||||
logger.Warningf("clickhouse test query failed: %v", err)
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.ELASTICSEARCH {
|
||||
skipAuto := false
|
||||
// 若用户输入了version(version字符串存在且不为空),则不自动获取
|
||||
if req.SettingsJson != nil {
|
||||
if v, ok := req.SettingsJson["version"]; ok {
|
||||
switch vv := v.(type) {
|
||||
case string:
|
||||
if strings.TrimSpace(vv) != "" {
|
||||
skipAuto = true
|
||||
}
|
||||
default:
|
||||
if strings.TrimSpace(fmt.Sprint(vv)) != "" {
|
||||
skipAuto = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !skipAuto {
|
||||
version, err := getElasticsearchVersion(req, 10*time.Second)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get elasticsearch version: %v", err)
|
||||
} else {
|
||||
if req.SettingsJson == nil {
|
||||
req.SettingsJson = make(map[string]interface{})
|
||||
}
|
||||
req.SettingsJson["version"] = version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if req.Id == 0 {
|
||||
req.CreatedBy = username
|
||||
req.Status = "enabled"
|
||||
@@ -123,13 +276,13 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
err = req.Add(rt.Ctx)
|
||||
} else {
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
|
||||
}
|
||||
|
||||
Render(c, nil, err)
|
||||
}
|
||||
|
||||
func DatasourceCheck(ds models.Datasource) error {
|
||||
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
|
||||
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
|
||||
if ds.HTTPJson.Url == "" {
|
||||
return fmt.Errorf("url is empty")
|
||||
@@ -140,11 +293,15 @@ func DatasourceCheck(ds models.Datasource) error {
|
||||
}
|
||||
}
|
||||
|
||||
// 使用 TLS 配置(支持 mTLS)
|
||||
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create TLS config: %v", err)
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
TLSClientConfig: tlsConfig,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -188,6 +345,10 @@ func DatasourceCheck(ds models.Datasource) error {
|
||||
req, err = http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("Error creating request: %v", err)
|
||||
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
|
||||
lang := c.GetHeader("X-Language")
|
||||
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
|
||||
}
|
||||
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
|
||||
}
|
||||
}
|
||||
@@ -209,6 +370,10 @@ func DatasourceCheck(ds models.Datasource) error {
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
logger.Errorf("Error making request: %v\n", resp.StatusCode)
|
||||
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
|
||||
lang := c.GetHeader("X-Language")
|
||||
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
|
||||
}
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
|
||||
}
|
||||
@@ -294,3 +459,82 @@ func (rt *Router) datasourceQuery(c *gin.Context) {
|
||||
}
|
||||
ginx.NewRender(c).Data(req, err)
|
||||
}
|
||||
|
||||
// getElasticsearchVersion 该函数尝试从提供的Elasticsearch数据源中获取版本号,遍历所有URL,
|
||||
// 直到成功获取版本号或所有URL均尝试失败为止。
|
||||
func getElasticsearchVersion(ds models.Datasource, timeout time.Duration) (string, error) {
|
||||
client := &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
urls := make([]string, 0)
|
||||
if len(ds.HTTPJson.Urls) > 0 {
|
||||
urls = append(urls, ds.HTTPJson.Urls...)
|
||||
}
|
||||
if ds.HTTPJson.Url != "" {
|
||||
urls = append(urls, ds.HTTPJson.Url)
|
||||
}
|
||||
if len(urls) == 0 {
|
||||
return "", fmt.Errorf("no url provided")
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for _, raw := range urls {
|
||||
baseURL := strings.TrimRight(raw, "/") + "/"
|
||||
req, err := http.NewRequest("GET", baseURL, nil)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.AuthJson.BasicAuthUser != "" {
|
||||
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
|
||||
}
|
||||
|
||||
for k, v := range ds.HTTPJson.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
lastErr = fmt.Errorf("request to %s failed with status: %d body:%s", baseURL, resp.StatusCode, string(body))
|
||||
continue
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if version, ok := result["version"].(map[string]interface{}); ok {
|
||||
if number, ok := version["number"].(string); ok && number != "" {
|
||||
return number, nil
|
||||
}
|
||||
}
|
||||
|
||||
lastErr = fmt.Errorf("version not found in response from %s", baseURL)
|
||||
}
|
||||
|
||||
if lastErr != nil {
|
||||
return "", lastErr
|
||||
}
|
||||
return "", fmt.Errorf("failed to get elasticsearch version")
|
||||
}
|
||||
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -60,8 +60,8 @@ func (rt *Router) ShowTables(c *gin.Context) {
|
||||
}
|
||||
switch plug.(type) {
|
||||
case TableShower:
|
||||
if len(f.Querys) > 0 {
|
||||
database, ok := f.Querys[0].(string)
|
||||
if len(f.Queries) > 0 {
|
||||
database, ok := f.Queries[0].(string)
|
||||
if ok {
|
||||
tables, err = plug.(TableShower).ShowTables(c.Request.Context(), database)
|
||||
}
|
||||
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
// 只接受一个入参
|
||||
@@ -90,8 +90,8 @@ func (rt *Router) DescribeTable(c *gin.Context) {
|
||||
switch plug.(type) {
|
||||
case TableDescriber:
|
||||
client := plug.(TableDescriber)
|
||||
if len(f.Querys) > 0 {
|
||||
columns, err = client.DescribeTable(c.Request.Context(), f.Querys[0])
|
||||
if len(f.Queries) > 0 {
|
||||
columns, err = client.DescribeTable(c.Request.Context(), f.Queries[0])
|
||||
}
|
||||
default:
|
||||
ginx.Bomb(200, "datasource not exists")
|
||||
|
||||
@@ -5,14 +5,15 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) embeddedProductGets(c *gin.Context) {
|
||||
products, err := models.EmbeddedProductGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, products)
|
||||
// 获取当前用户可访问的Group ID 列表
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/es"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type IndexReq struct {
|
||||
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// 创建 ES Index Pattern
|
||||
@@ -69,6 +69,10 @@ func (rt *Router) esIndexPatternGetList(c *gin.Context) {
|
||||
lst, err = models.EsIndexPatternGets(rt.Ctx, "")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
|
||||
149
center/router/router_event_detail.go
Normal file
149
center/router/router_event_detail.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// eventDetailPage renders an HTML log viewer page (for pages group).
|
||||
func (rt *Router) eventDetailPage(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
c.String(http.StatusBadRequest, "invalid hash format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
|
||||
Hash: hash,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// eventDetailJSON returns JSON (for service group).
|
||||
func (rt *Router) eventDetailJSON(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getNodeForDatasource returns the alert engine instance responsible for the given
|
||||
// datasource and primary key. It first checks the local hashring, and falls back
|
||||
// to querying the database for active instances if the hashring is empty
|
||||
// (e.g. when the datasource belongs to another engine cluster).
|
||||
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
|
||||
dsIdStr := strconv.FormatInt(datasourceId, 10)
|
||||
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
|
||||
if err == nil {
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// Hashring is empty for this datasource (likely belongs to another engine cluster).
|
||||
// Query the DB for active instances.
|
||||
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
|
||||
"datasource_id = ? and clock > ?",
|
||||
datasourceId, time.Now().Unix()-30)
|
||||
if dbErr != nil {
|
||||
return "", dbErr
|
||||
}
|
||||
if len(servers) == 0 {
|
||||
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
|
||||
}
|
||||
|
||||
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
|
||||
return ring.Get(pk)
|
||||
}
|
||||
|
||||
// getEventLogs resolves the target instance and retrieves logs.
|
||||
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
|
||||
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if event == nil {
|
||||
return nil, "", fmt.Errorf("no such alert event")
|
||||
}
|
||||
|
||||
ruleId := strconv.FormatInt(event.RuleId, 10)
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
|
||||
if err != nil || node == instance {
|
||||
// hashring not ready or target is self, handle locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// forward to the target alert instance
|
||||
return rt.forwardEventDetail(node, hash)
|
||||
}
|
||||
|
||||
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -1,13 +1,18 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -27,18 +32,38 @@ func (rt *Router) eventPipelinesList(c *gin.Context) {
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
|
||||
}
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, pipelines)
|
||||
|
||||
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if me.IsAdmin() {
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(pipelines, nil)
|
||||
return
|
||||
}
|
||||
|
||||
res := make([]*models.EventPipeline, 0)
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
if _, ok := gids[tid]; ok {
|
||||
res = append(res, pipeline)
|
||||
@@ -61,6 +86,15 @@ func (rt *Router) getEventPipeline(c *gin.Context) {
|
||||
err = pipeline.FillTeamNames(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(pipeline, nil)
|
||||
}
|
||||
|
||||
@@ -131,7 +165,9 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
var f struct {
|
||||
EventId int64 `json:"event_id"`
|
||||
PipelineConfig models.EventPipeline `json:"pipeline_config"`
|
||||
InputVariables map[string]string `json:"input_variables,omitempty"`
|
||||
}
|
||||
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
@@ -140,18 +176,35 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
}
|
||||
event := hisEvent.ToCur()
|
||||
|
||||
for _, p := range f.PipelineConfig.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
}
|
||||
lang := c.GetHeader("X-Language")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputVariables,
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
|
||||
}
|
||||
|
||||
m := map[string]interface{}{
|
||||
"event": resultEvent,
|
||||
"result": i18n.Sprintf(lang, result.Message),
|
||||
"status": result.Status,
|
||||
"node_results": result.NodeResults,
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
m["result"] = i18n.Sprintf(lang, "event is dropped")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(m, nil)
|
||||
}
|
||||
|
||||
// 测试事件处理器
|
||||
@@ -170,15 +223,22 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
|
||||
|
||||
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor err: %+v", err)
|
||||
ginx.Bomb(200, "get processor err: %+v", err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
logger.Infof("processor %+v result: %+v", f.ProcessorConfig, event)
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "processor err: %+v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": wfCtx.Event,
|
||||
"result": i18n.Sprintf(lang, res),
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
@@ -206,23 +266,374 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "processors not found")
|
||||
}
|
||||
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
for _, pl := range pipelines {
|
||||
for _, p := range pl.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event = processor.Process(rt.Ctx, event)
|
||||
if event == nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "event is dropped")
|
||||
|
||||
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
if wfCtx == nil || wfCtx.Event == nil {
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": nil,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
ginx.NewRender(c).Data(wfCtx.Event, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
|
||||
pipelines, err := models.ListEventPipelines(rt.Ctx)
|
||||
ginx.NewRender(c).Data(pipelines, err)
|
||||
}
|
||||
|
||||
type EventPipelineRequest struct {
|
||||
// 事件数据(可选,如果不传则使用空事件)
|
||||
Event *models.AlertCurEvent `json:"event,omitempty"`
|
||||
// 输入参数覆盖
|
||||
InputsOverrides map[string]string `json:"inputs_overrides,omitempty"`
|
||||
|
||||
Username string `json:"username,omitempty"`
|
||||
}
|
||||
|
||||
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
|
||||
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
|
||||
// 准备事件数据
|
||||
var event *models.AlertCurEvent
|
||||
if req.Event != nil {
|
||||
event = req.Event
|
||||
} else {
|
||||
// 创建空事件
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
// 生成执行ID
|
||||
executionID := uuid.New().String()
|
||||
|
||||
// 创建触发上下文
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: triggerBy,
|
||||
InputsOverrides: req.InputsOverrides,
|
||||
RequestID: executionID,
|
||||
}
|
||||
|
||||
// 异步执行工作流
|
||||
go func() {
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
|
||||
pipeline.ID, executionID, err)
|
||||
}
|
||||
}()
|
||||
|
||||
return executionID, nil
|
||||
}
|
||||
|
||||
// triggerEventPipelineByService Service 调用触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "%v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// triggerEventPipelineByAPI API 触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
// 检查权限
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineId := ginx.QueryInt64(c, "pipeline_id", 0)
|
||||
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineId, pipelineName, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
|
||||
execID := ginx.UrlParamStr(c, "exec_id")
|
||||
|
||||
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(detail, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(stats, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
|
||||
var f struct {
|
||||
BeforeDays int `json:"before_days"`
|
||||
}
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if f.BeforeDays <= 0 {
|
||||
f.BeforeDays = 30
|
||||
}
|
||||
|
||||
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
|
||||
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"deleted": affected,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipeline(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
|
||||
// 设置 SSE 响应头
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
|
||||
c.Header("X-Request-ID", requestID)
|
||||
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
|
||||
return
|
||||
}
|
||||
|
||||
// 发送初始连接成功消息
|
||||
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
|
||||
flusher.Flush()
|
||||
|
||||
// 从 channel 读取并发送 SSE
|
||||
timeout := time.After(30 * time.Minute) // 最长流式输出时间
|
||||
for {
|
||||
select {
|
||||
case chunk, ok := <-result.StreamChan:
|
||||
if !ok {
|
||||
// channel 关闭,发送结束标记
|
||||
return
|
||||
}
|
||||
|
||||
data, err := json.Marshal(chunk)
|
||||
if err != nil {
|
||||
logger.Errorf("stream: failed to marshal chunk: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
|
||||
flusher.Flush()
|
||||
|
||||
if chunk.Done {
|
||||
return
|
||||
}
|
||||
|
||||
case <-c.Request.Context().Done():
|
||||
// 客户端断开连接
|
||||
logger.Infof("stream: client disconnected, request_id=%s", requestID)
|
||||
return
|
||||
case <-timeout:
|
||||
logger.Errorf("stream: timeout, request_id=%s", requestID)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: f.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
// 检查是否是流式输出
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
// eventPipelineExecutionAdd 接收 edge 节点同步的 Pipeline 执行记录
|
||||
func (rt *Router) eventPipelineExecutionAdd(c *gin.Context) {
|
||||
var execution models.EventPipelineExecution
|
||||
ginx.BindJSON(c, &execution)
|
||||
|
||||
if execution.ID == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
if execution.PipelineID <= 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline_id is required")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.DB(rt.Ctx).Create(&execution).Error)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const defaultLimit = 300
|
||||
@@ -128,6 +128,12 @@ func UserGroup(ctx *ctx.Context, id int64) *models.UserGroup {
|
||||
ginx.Bomb(http.StatusNotFound, "No such UserGroup")
|
||||
}
|
||||
|
||||
bgids, err := models.BusiGroupIds(ctx, []int64{id})
|
||||
ginx.Dangerous(err)
|
||||
|
||||
obj.BusiGroups, err = models.BusiGroupGetByIds(ctx, bgids)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,23 +2,29 @@ package router
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/feishu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/dgrijalva/jwt-go"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/pkg/errors"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type loginForm struct {
|
||||
@@ -31,7 +37,9 @@ type loginForm struct {
|
||||
func (rt *Router) loginPost(c *gin.Context) {
|
||||
var f loginForm
|
||||
ginx.BindJSON(c, &f)
|
||||
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
|
||||
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
|
||||
|
||||
if rt.HTTP.ShowCaptcha.Enable {
|
||||
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
|
||||
@@ -44,23 +52,25 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
|
||||
reqCtx := rt.Ctx.WithContext(rctx)
|
||||
|
||||
var user *models.User
|
||||
var err error
|
||||
lc := rt.Sso.LDAP.Copy()
|
||||
if lc.Enable {
|
||||
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
if err != nil {
|
||||
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
|
||||
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
|
||||
var errLoginInN9e error
|
||||
// to use n9e as the minimum guarantee for login
|
||||
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
|
||||
return
|
||||
}
|
||||
@@ -68,7 +78,7 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
user.RolesLst = strings.Fields(user.Roles)
|
||||
}
|
||||
} else {
|
||||
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
|
||||
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
@@ -92,7 +102,8 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) logoutPost(c *gin.Context) {
|
||||
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
metadata, err := rt.extractTokenMetadata(c.Request)
|
||||
if err != nil {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
|
||||
@@ -107,9 +118,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
|
||||
var logoutAddr string
|
||||
user := c.MustGet("user").(*models.User)
|
||||
|
||||
// 获取用户的 id_token
|
||||
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
idToken = "" // 如果获取失败,使用空字符串
|
||||
}
|
||||
|
||||
// 删除 id_token
|
||||
rt.deleteIdToken(c.Request.Context(), user.Id)
|
||||
|
||||
switch user.Belong {
|
||||
case "oidc":
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
|
||||
case "cas":
|
||||
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
|
||||
case "oauth2":
|
||||
@@ -199,6 +221,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
|
||||
// 注意:这里不会获取新的 id_token,只是延长 Redis 中现有 id_token 的 TTL
|
||||
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
|
||||
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"access_token": ts.AccessToken,
|
||||
"refresh_token": ts.RefreshToken,
|
||||
@@ -246,12 +276,13 @@ type CallbackOutput struct {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallback(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -274,7 +305,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
for _, gid := range rt.Sso.OIDC.DefaultTeams {
|
||||
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
|
||||
if err != nil {
|
||||
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
|
||||
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -284,7 +315,14 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
|
||||
// 保存 id_token 到 Redis,用于登出时使用
|
||||
if ret.IdToken != "" {
|
||||
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
|
||||
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
}
|
||||
}
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
@@ -323,7 +361,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
if !rt.Sso.CAS.Enable {
|
||||
logger.Error("cas is not enable")
|
||||
logx.Errorf(c.Request.Context(), "cas is not enable")
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
@@ -338,17 +376,18 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
ticket := ginx.QueryStr(c, "ticket", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
|
||||
if err != nil {
|
||||
logger.Errorf("ValidateServiceTicket: %s", err)
|
||||
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
|
||||
ginx.NewRender(c).Data("", err)
|
||||
return
|
||||
}
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
if err != nil {
|
||||
logger.Errorf("UserGet: %s", err)
|
||||
logx.Errorf(rctx, "UserGet: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
if user != nil {
|
||||
@@ -367,10 +406,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
if err != nil {
|
||||
logger.Errorf("createTokens: %s", err)
|
||||
logx.Errorf(rctx, "createTokens: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
@@ -413,13 +452,180 @@ func (rt *Router) loginRedirectOAuth(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
|
||||
redirect := ginx.QueryStr(c, "redirect", "/")
|
||||
|
||||
v, exists := c.Get("userid")
|
||||
if exists {
|
||||
userid := v.(int64)
|
||||
user, err := models.UserGetById(rt.Ctx, userid)
|
||||
ginx.Dangerous(err)
|
||||
if user == nil {
|
||||
ginx.Bomb(200, "user not found")
|
||||
}
|
||||
|
||||
if user.Username != "" { // already login
|
||||
ginx.NewRender(c).Data(redirect, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !rt.Sso.DingTalk.Enable {
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
|
||||
redirect, err := rt.Sso.DingTalk.Authorize(rt.Redis, redirect)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
|
||||
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.DingTalk.DingTalkConfig.CoverAttributes {
|
||||
updatedFields := user.UpdateSsoFields(dingtalk.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
user = new(models.User)
|
||||
user.FullSsoFields(dingtalk.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.DingTalk.DingTalkConfig.DefaultRoles)
|
||||
// create user from dingtalk
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
// set user login state
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(CallbackOutput{
|
||||
Redirect: redirect,
|
||||
User: user,
|
||||
AccessToken: ts.AccessToken,
|
||||
RefreshToken: ts.RefreshToken,
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
|
||||
redirect := ginx.QueryStr(c, "redirect", "/")
|
||||
|
||||
v, exists := c.Get("userid")
|
||||
if exists {
|
||||
userid := v.(int64)
|
||||
user, err := models.UserGetById(rt.Ctx, userid)
|
||||
ginx.Dangerous(err)
|
||||
if user == nil {
|
||||
ginx.Bomb(200, "user not found")
|
||||
}
|
||||
|
||||
if user.Username != "" { // already login
|
||||
ginx.NewRender(c).Data(redirect, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu == nil || !rt.Sso.FeiShu.Enable {
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
|
||||
redirect, err := rt.Sso.FeiShu.Authorize(rt.Redis, redirect)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil && rt.Sso.FeiShu.FeiShuConfig.CoverAttributes {
|
||||
updatedFields := user.UpdateSsoFields(feishu.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
user = new(models.User)
|
||||
defaultRoles := []string{}
|
||||
defaultUserGroups := []int64{}
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil {
|
||||
defaultRoles = rt.Sso.FeiShu.FeiShuConfig.DefaultRoles
|
||||
defaultUserGroups = rt.Sso.FeiShu.FeiShuConfig.DefaultUserGroups
|
||||
}
|
||||
|
||||
user.FullSsoFields(feishu.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, defaultRoles)
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
|
||||
if len(defaultUserGroups) > 0 {
|
||||
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// set user login state
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(CallbackOutput{
|
||||
Redirect: redirect,
|
||||
User: user,
|
||||
AccessToken: ts.AccessToken,
|
||||
RefreshToken: ts.RefreshToken,
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -459,13 +665,15 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
}
|
||||
|
||||
type SsoConfigOutput struct {
|
||||
OidcDisplayName string `json:"oidcDisplayName"`
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
OidcDisplayName string `json:"oidcDisplayName"`
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
DingTalkDisplayName string `json:"dingTalkDisplayName"`
|
||||
FeiShuDisplayName string `json:"feishuDisplayName"`
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName string
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName, feiShuDisplayName string
|
||||
if rt.Sso.OIDC != nil {
|
||||
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
|
||||
}
|
||||
@@ -478,23 +686,117 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
oauthDisplayName = rt.Sso.OAuth2.GetDisplayName()
|
||||
}
|
||||
|
||||
if rt.Sso.DingTalk != nil {
|
||||
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu != nil {
|
||||
feiShuDisplayName = rt.Sso.FeiShu.GetDisplayName()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(SsoConfigOutput{
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
DingTalkDisplayName: dingTalkDisplayName,
|
||||
FeiShuDisplayName: feiShuDisplayName,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(models.SsoConfigGets(rt.Ctx))
|
||||
var ssoConfigs []models.SsoConfig
|
||||
lst, err := models.SsoConfigGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
if len(lst) == 0 {
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
dingTalkExist := false
|
||||
feiShuExist := false
|
||||
for _, config := range lst {
|
||||
var ssoReqConfig models.SsoConfig
|
||||
ssoReqConfig.Id = config.Id
|
||||
ssoReqConfig.Name = config.Name
|
||||
ssoReqConfig.UpdateAt = config.UpdateAt
|
||||
switch config.Name {
|
||||
case dingtalk.SsoTypeName:
|
||||
dingTalkExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
feiShuExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ssoReqConfig.Content = config.Content
|
||||
}
|
||||
|
||||
ssoConfigs = append(ssoConfigs, ssoReqConfig)
|
||||
}
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
if !dingTalkExist {
|
||||
var ssoConfig models.SsoConfig
|
||||
ssoConfig.Name = dingtalk.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
if !feiShuExist {
|
||||
var ssoConfig models.SsoConfig
|
||||
ssoConfig.Name = feishu.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
var f models.SsoConfig
|
||||
ginx.BindJSON(c, &f)
|
||||
var ssoConfig models.SsoConfig
|
||||
ginx.BindJSON(c, &ssoConfig)
|
||||
|
||||
err := f.Update(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
switch ssoConfig.Name {
|
||||
case dingtalk.SsoTypeName:
|
||||
f.Name = ssoConfig.Name
|
||||
setting, err := json.Marshal(ssoConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
f.Content = string(setting)
|
||||
f.UpdateAt = time.Now().Unix()
|
||||
sso, err := f.Query(rt.Ctx)
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
err = f.Create(rt.Ctx)
|
||||
} else {
|
||||
f.Id = sso.Id
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
f.Name = ssoConfig.Name
|
||||
setting, err := json.Marshal(ssoConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
f.Content = string(setting)
|
||||
f.UpdateAt = time.Now().Unix()
|
||||
sso, err := f.Query(rt.Ctx)
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
err = f.Create(rt.Ctx)
|
||||
} else {
|
||||
f.Id = sso.Id
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
f.Id = ssoConfig.Id
|
||||
f.Name = ssoConfig.Name
|
||||
f.Content = ssoConfig.Content
|
||||
err := f.Update(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
switch f.Name {
|
||||
case "LDAP":
|
||||
@@ -518,6 +820,22 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
err := toml.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
rt.Sso.OAuth2.Reload(config)
|
||||
case dingtalk.SsoTypeName:
|
||||
var config dingtalk.Config
|
||||
err := json.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
if rt.Sso.DingTalk == nil {
|
||||
rt.Sso.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
rt.Sso.DingTalk.Reload(config)
|
||||
case feishu.SsoTypeName:
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
if rt.Sso.FeiShu == nil {
|
||||
rt.Sso.FeiShu = feishu.New(config)
|
||||
}
|
||||
rt.Sso.FeiShu.Reload(config)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
|
||||
@@ -154,6 +154,7 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
|
||||
|
||||
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
if me.IsAdmin() {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
@@ -193,10 +194,9 @@ func (rt *Router) eventsMessage(c *gin.Context) {
|
||||
events[i] = he.ToCur()
|
||||
}
|
||||
|
||||
var defs = []string{
|
||||
"{{$events := .}}",
|
||||
"{{$event := index . 0}}",
|
||||
}
|
||||
renderData := make(map[string]interface{})
|
||||
renderData["events"] = events
|
||||
defs := models.GetDefs(renderData)
|
||||
ret := make(map[string]string, len(req.Tpl.Content))
|
||||
for k, v := range req.Tpl.Content {
|
||||
text := strings.Join(append(defs, v), "")
|
||||
@@ -207,7 +207,7 @@ func (rt *Router) eventsMessage(c *gin.Context) {
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
err = tpl.Execute(&buf, events)
|
||||
err = tpl.Execute(&buf, renderData)
|
||||
if err != nil {
|
||||
ret[k] = err.Error()
|
||||
continue
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricsDescGetFile(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"math"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -10,15 +9,22 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// Return all, front-end search and paging
|
||||
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
|
||||
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -44,6 +50,9 @@ func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -53,11 +62,20 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
|
||||
bgid := ginx.QueryInt64(c, "bgid", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
disabled := ginx.QueryInt(c, "disabled", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
|
||||
lst, err := models.AlertMuteGetsAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
|
||||
var f models.AlertMute
|
||||
@@ -67,18 +85,21 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
f.CreateBy = username
|
||||
f.UpdateBy = username
|
||||
f.GroupId = ginx.UrlParamInt64(c, "id")
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
|
||||
ginx.Dangerous(f.Add(rt.Ctx))
|
||||
ginx.NewRender(c).Data(f.Id, nil)
|
||||
}
|
||||
|
||||
type MuteTestForm struct {
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertMute models.AlertMute `json:"mute_config" binding:"required"`
|
||||
EventId int64 `json:"event_id" binding:"required"`
|
||||
AlertMute models.AlertMute `json:"config" binding:"required"`
|
||||
PassTimeCheck bool `json:"pass_time_check"`
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteTryRun(c *gin.Context) {
|
||||
|
||||
var f MuteTestForm
|
||||
ginx.BindJSON(c, &f)
|
||||
ginx.Dangerous(f.AlertMute.Verify())
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
ginx.Dangerous(err)
|
||||
@@ -90,18 +111,30 @@ func (rt *Router) alertMuteTryRun(c *gin.Context) {
|
||||
curEvent := *hisEvent.ToCur()
|
||||
curEvent.SetTagsMap()
|
||||
|
||||
// 绕过时间范围检查:设置时间范围为全量(0 到 int64 最大值),仅验证其他匹配条件(如标签、策略类型等)
|
||||
f.AlertMute.MuteTimeType = models.TimeRange
|
||||
f.AlertMute.Btime = 0 // 最小可能值(如 Unix 时间戳起点)
|
||||
f.AlertMute.Etime = math.MaxInt64 // 最大可能值(int64 上限)
|
||||
if f.PassTimeCheck {
|
||||
f.AlertMute.MuteTimeType = models.Periodic
|
||||
f.AlertMute.PeriodicMutesJson = []models.PeriodicMute{
|
||||
{
|
||||
EnableDaysOfWeek: "0 1 2 3 4 5 6",
|
||||
EnableStime: "00:00",
|
||||
EnableEtime: "00:00",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if !mute.MatchMute(&curEvent, &f.AlertMute) {
|
||||
ginx.NewRender(c).Data("not match", nil)
|
||||
match, err := mute.MatchMute(&curEvent, &f.AlertMute)
|
||||
if err != nil {
|
||||
// 对错误信息进行 i18n 翻译
|
||||
translatedErr := i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
ginx.Bomb(http.StatusBadRequest, translatedErr)
|
||||
}
|
||||
|
||||
if !match {
|
||||
ginx.NewRender(c).Data("event not match mute", nil)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data("mute test match", nil)
|
||||
|
||||
ginx.NewRender(c).Data("event match mute", nil)
|
||||
}
|
||||
|
||||
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/golang-jwt/jwt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
|
||||
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
|
||||
}
|
||||
|
||||
func (rt *Router) wrapIdTokenKey(userId int64) string {
|
||||
return fmt.Sprintf("n9e_id_token_%d", userId)
|
||||
}
|
||||
|
||||
// saveIdToken 保存用户的 id_token 到 Redis
|
||||
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
|
||||
if idToken == "" {
|
||||
return nil
|
||||
}
|
||||
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
|
||||
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
|
||||
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
|
||||
}
|
||||
|
||||
// fetchIdToken 从 Redis 获取用户的 id_token
|
||||
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
|
||||
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
|
||||
}
|
||||
|
||||
// deleteIdToken 从 Redis 删除用户的 id_token
|
||||
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
|
||||
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
|
||||
}
|
||||
|
||||
type TokenDetails struct {
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ type Record struct {
|
||||
|
||||
// notificationRecordAdd
|
||||
func (rt *Router) notificationRecordAdd(c *gin.Context) {
|
||||
var req []*models.NotificaitonRecord
|
||||
var req []*models.NotificationRecord
|
||||
ginx.BindJSON(c, &req)
|
||||
err := sender.PushNotifyRecords(req)
|
||||
ginx.Dangerous(err, 429)
|
||||
@@ -43,14 +43,14 @@ func (rt *Router) notificationRecordAdd(c *gin.Context) {
|
||||
|
||||
func (rt *Router) notificationRecordList(c *gin.Context) {
|
||||
eid := ginx.UrlParamInt64(c, "eid")
|
||||
lst, err := models.NotificaitonRecordsGetByEventId(rt.Ctx, eid)
|
||||
lst, err := models.NotificationRecordsGetByEventId(rt.Ctx, eid)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
response := buildNotificationResponse(rt.Ctx, lst)
|
||||
ginx.NewRender(c).Data(response, nil)
|
||||
}
|
||||
|
||||
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificaitonRecord) NotificationResponse {
|
||||
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificationRecord) NotificationResponse {
|
||||
response := NotificationResponse{
|
||||
SubRules: []SubRule{},
|
||||
Notifies: make(map[string][]Record),
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
|
||||
@@ -118,6 +118,9 @@ func (rt *Router) notifyChannelGetBy(c *gin.Context) {
|
||||
|
||||
func (rt *Router) notifyChannelsGet(c *gin.Context) {
|
||||
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -162,21 +165,6 @@ func (rt *Router) notifyChannelIdentsGet(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
type flushDutyChannelsResponse struct {
|
||||
Error struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
Data struct {
|
||||
Items []struct {
|
||||
ChannelID int `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
Status string `json:"status"`
|
||||
} `json:"items"`
|
||||
Total int `json:"total"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
@@ -196,18 +184,31 @@ func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
|
||||
jsonData = []byte(fmt.Sprintf(`{"member_name":"%s","email":"%s","phone":"%s"}`, me.Username, me.Email, me.Phone))
|
||||
}
|
||||
|
||||
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData)
|
||||
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData, time.Duration(nc.RequestConfig.FlashDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(items, nil)
|
||||
}
|
||||
|
||||
// getFlashDutyChannels 从FlashDuty API获取频道列表
|
||||
func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
|
||||
type flushDutyChannelsResponse struct {
|
||||
Error struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
Data struct {
|
||||
Items []FlashDutyChannel `json:"items"`
|
||||
Total int `json:"total"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type FlashDutyChannel struct {
|
||||
ChannelID int `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
Status string `json:"status"`
|
||||
}, error) {
|
||||
}
|
||||
|
||||
// getFlashDutyChannels 从FlashDuty API获取频道列表
|
||||
func getFlashDutyChannels(integrationUrl string, jsonData []byte, timeout time.Duration) ([]FlashDutyChannel, error) {
|
||||
// 解析URL,提取baseUrl和参数
|
||||
baseUrl, integrationKey, err := parseIntegrationUrl(integrationUrl)
|
||||
if err != nil {
|
||||
@@ -227,7 +228,9 @@ func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
httpResp, err := (&http.Client{}).Do(req)
|
||||
httpResp, err := (&http.Client{
|
||||
Timeout: timeout,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -266,3 +269,149 @@ func parseIntegrationUrl(urlStr string) (baseUrl string, integrationKey string,
|
||||
|
||||
return host, integrationKey, nil
|
||||
}
|
||||
|
||||
func (rt *Router) pagerDutyNotifyServicesGet(c *gin.Context) {
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil || nc == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "notify channel not found")
|
||||
}
|
||||
|
||||
items, err := getPagerDutyServices(nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty services: %v", err))
|
||||
}
|
||||
// 服务: []集成,扁平化为服务-集成
|
||||
var flattenedItems []map[string]string
|
||||
for _, svc := range items {
|
||||
for _, integ := range svc.Integrations {
|
||||
flattenedItems = append(flattenedItems, map[string]string{
|
||||
"service_id": svc.ID,
|
||||
"service_name": svc.Name,
|
||||
"integration_summary": integ.Summary,
|
||||
"integration_id": integ.ID,
|
||||
"integration_url": integ.Self,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(flattenedItems, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) pagerDutyIntegrationKeyGet(c *gin.Context) {
|
||||
serviceId := ginx.UrlParamStr(c, "service_id")
|
||||
integrationId := ginx.UrlParamStr(c, "integration_id")
|
||||
cid := ginx.UrlParamInt64(c, "id")
|
||||
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil || nc == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "notify channel not found")
|
||||
}
|
||||
|
||||
integrationUrl := fmt.Sprintf("https://api.pagerduty.com/services/%s/integrations/%s", serviceId, integrationId)
|
||||
integrationKey, err := getPagerDutyIntegrationKey(integrationUrl, nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty integration key: %v", err))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(map[string]string{
|
||||
"integration_key": integrationKey,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type PagerDutyIntegration struct {
|
||||
ID string `json:"id"`
|
||||
IntegrationKey string `json:"integration_key"`
|
||||
Self string `json:"self"` // integration 的 API URL
|
||||
Summary string `json:"summary"`
|
||||
}
|
||||
|
||||
type PagerDutyService struct {
|
||||
Name string `json:"name"`
|
||||
ID string `json:"id"`
|
||||
Integrations []PagerDutyIntegration `json:"integrations"`
|
||||
}
|
||||
|
||||
// getPagerDutyServices 从 PagerDuty API 分页获取所有服务及其集成信息
|
||||
func getPagerDutyServices(apiKey string, timeout time.Duration) ([]PagerDutyService, error) {
|
||||
const limit = 100 // 每页最大数量
|
||||
var offset uint // 分页偏移量
|
||||
var allServices []PagerDutyService
|
||||
|
||||
for {
|
||||
// 构建带分页参数的 URL
|
||||
url := fmt.Sprintf("https://api.pagerduty.com/services?limit=%d&offset=%d", limit, offset)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
|
||||
req.Header.Set("Accept", "application/vnd.pagerduty+json;version=2")
|
||||
|
||||
httpResp, err := (&http.Client{Timeout: timeout}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(httpResp.Body)
|
||||
httpResp.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 定义包含分页信息的响应结构
|
||||
var serviceRes struct {
|
||||
Services []PagerDutyService `json:"services"`
|
||||
More bool `json:"more"` // 是否还有更多数据
|
||||
Limit uint `json:"limit"`
|
||||
Offset uint `json:"offset"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &serviceRes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
allServices = append(allServices, serviceRes.Services...)
|
||||
// 判断是否还有更多数据
|
||||
if !serviceRes.More || len(serviceRes.Services) < int(limit) {
|
||||
break
|
||||
}
|
||||
offset += limit // 准备请求下一页
|
||||
}
|
||||
|
||||
return allServices, nil
|
||||
}
|
||||
|
||||
// getPagerDutyIntegrationKey 通过 integration 的 API URL 获取 integration key
|
||||
func getPagerDutyIntegrationKey(integrationUrl, apiKey string, timeout time.Duration) (string, error) {
|
||||
req, err := http.NewRequest("GET", integrationUrl, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
|
||||
|
||||
httpResp, err := (&http.Client{
|
||||
Timeout: timeout,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
body, err := io.ReadAll(httpResp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var integRes struct {
|
||||
Integration struct {
|
||||
IntegrationKey string `json:"integration_key"`
|
||||
} `json:"integration"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &integRes); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return integRes.Integration.IntegrationKey, nil
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user