Compare commits

...

95 Commits

Author SHA1 Message Date
ning
f8ddce8149 code refactor 2025-06-17 18:44:19 +08:00
ning
45685947dd refactor: event notify 2025-06-17 18:00:45 +08:00
ning
cddf5e7d37 refactor: event list api 2025-06-03 18:59:25 +08:00
ning
f07baa276e docs: update sql 2025-06-03 18:54:04 +08:00
Ulric Qin
2c2d5004f4 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-03 11:27:44 +08:00
Ulric Qin
9982666e44 update dashboard 2025-06-03 11:27:36 +08:00
ning
2b448f738c refactor: change role ops 2025-06-02 09:34:20 +08:00
ning
e4c258de8e refactor: change user ops 2025-06-02 09:24:37 +08:00
Ulric Qin
4f128a9b44 rename Null to Others in active events page 2025-05-30 12:19:09 +08:00
Ulric Qin
deb85b9c68 update README img 2025-05-30 08:51:00 +08:00
ning
1b84324147 refactor: rm blockEventNotify 2025-05-29 21:49:57 +08:00
ning
c73b66848e fix: cur event api 2025-05-29 20:30:39 +08:00
ning
cd74442819 refactor: add UpdateBy field assignment in alertMuteAdd function 2025-05-29 17:15:58 +08:00
ning
252a8284f9 refactor: update datasource 2025-05-29 11:02:47 +08:00
ning
7d2e998078 refactor: merge 2025-05-29 10:30:54 +08:00
Yening Qin
69582bacdf feat: add source token api 2025-05-29 10:05:49 +08:00
ning
1bede4eeb8 refactor: build event tags 2025-05-28 16:09:53 +08:00
ning
16ed81020a Merge branch 'main' of github.com:ccfos/nightingale 2025-05-28 14:15:37 +08:00
ning
7b020ae238 refactor: datasource init add recover 2025-05-28 14:15:21 +08:00
ning
05eabcf00d refactor: handle ibex 2025-05-28 14:15:07 +08:00
ning
e316842022 fix: ibex after event relabel 2025-05-28 14:14:51 +08:00
Ulric Qin
8b3c4749aa Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-27 20:17:37 +08:00
Ulric Qin
16be04c3e9 use s3 as default card severity 2025-05-27 20:17:30 +08:00
ning
ccbadba9ff refactor: update send duty 2025-05-27 20:07:28 +08:00
ning
ce5bf2e473 refactor: event processor 2025-05-27 19:44:31 +08:00
Ulric Qin
80cdf9d0bb if eventcard.Severity < 1: set it to 1 2025-05-27 19:34:03 +08:00
ning
7514086ae6 fix: different notify channel use notify script 2025-05-27 14:27:00 +08:00
ning
116f8b1590 Merge branch 'main' of github.com:ccfos/nightingale 2025-05-27 14:14:31 +08:00
ning
0fb4e4b723 refactor: add eval duration 2025-05-27 14:14:16 +08:00
710leo
07fb427eea refactor: update relabel processor 2025-05-26 23:40:27 +08:00
ulricqin
d8f8fed95f Update README.md 2025-05-26 10:21:33 +08:00
ulricqin
f2e0ec10f7 更新 README.md 2025-05-25 13:09:37 +08:00
ulricqin
db467a8811 更新 README.md 2025-05-25 13:05:03 +08:00
Ulric Qin
b839bd3e16 code refactor 2025-05-24 21:45:47 +08:00
Ulric Qin
8033ca590b update README 2025-05-24 21:20:06 +08:00
Ulric Qin
0974f33d16 update README 2025-05-23 19:05:32 +08:00
Ulric Qin
d52a19b1f7 update README 2025-05-23 18:59:18 +08:00
Ulric Qin
f11c4dc87d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-23 18:57:22 +08:00
Ulric Qin
d7f3bc8841 update README 2025-05-23 18:57:05 +08:00
ning
2ae8c35a50 refactor: change notify rule list sort 2025-05-23 16:00:27 +08:00
ning
da0697c5ce refactor: event api and event processors 2025-05-23 15:45:09 +08:00
ning
2eff1159e5 refactor: event add notify rule ids 2025-05-23 13:19:48 +08:00
ning
6c19c0adf4 refactor: update AlertCurEvent 2025-05-22 19:28:35 +08:00
ning
5e5525ef57 refactor: update AlertCurEvent 2025-05-22 19:00:57 +08:00
ning
58c2a3cc71 update event db2fe 2025-05-22 17:11:11 +08:00
ning
cef6d5fe49 refactor: alert_aggr_view delete format 2025-05-22 16:32:15 +08:00
ulricqin
49cda8b58a modify alerting aggr verify rules (#2694) 2025-05-22 15:45:36 +08:00
ning
d6a585ccbd refactor: update cur event api 2025-05-21 20:29:50 +08:00
ning
764c254833 fix: AlertAggrView update 2025-05-21 20:11:13 +08:00
ning
c427abdfa3 fix: AlertAggrView update 2025-05-21 20:04:28 +08:00
shardingHe
3749f62adc docs: add config for ntp (#2690) 2025-05-21 16:25:21 +08:00
Yening Qin
f932f93a94 feat: add new processors (#2688) 2025-05-20 18:27:07 +08:00
smx_Morgan
5bbc432db0 feat : add event_Ids to alert-cur-events/list (#2681) 2025-05-20 15:55:45 +08:00
Yening Qin
0712baa6e1 refactor: change TimeSpanMuteStrategy (#2686) 2025-05-20 15:51:37 +08:00
ning
b4d595d5f5 docs: update ops 2025-05-19 17:40:56 +08:00
Yening Qin
95090055e0 refactor: change redis cli timeout (#2684) 2025-05-19 11:12:46 +08:00
smx_Morgan
880b92bf36 fix: telegram notify channel template (#2683) 2025-05-17 21:42:41 +08:00
Yening Qin
744eb44f19 feat: add event pipelines (#2682) 2025-05-16 14:50:13 +08:00
Ulric Qin
6ddc78ea11 refactor n9e-v8 dashboard 2025-05-15 09:56:47 +08:00
Ulric Qin
823568081b update n9e-v8 dashboard 2025-05-15 08:42:56 +08:00
Ulric Qin
2f8e63f821 add some metrics to observe redis operations 2025-05-15 08:27:39 +08:00
Ulric Qin
bdc9fa4638 update target's update_at one by one 2025-05-15 08:01:11 +08:00
Ulric Qin
9e1d69c8b0 refactor pushgw metrics 2025-05-15 07:52:39 +08:00
Ulric Qin
85d8607be8 add some panel for n9e-v8 dashboard 2025-05-15 07:31:27 +08:00
Ulric Qin
ec6a4f134a update target's timestamp in redis support batch 2025-05-15 06:21:29 +08:00
Ulric Qin
798f9e5536 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-15 04:43:39 +08:00
Ulric Qin
92095ea89c fix categraf-detail-dashboard: add filter for promql 2025-05-15 04:43:33 +08:00
Yening Qin
eb85c9c78b feat: add alert mute test function 2025-05-14 21:08:26 +08:00
Ulric Qin
bd8bf1cf9e use topk in linux-overview dashboard 2025-05-14 15:53:09 +08:00
Ulric Qin
b27ddf45cf Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-14 15:17:10 +08:00
Ulric Qin
c8e004ba51 update n9e_v8 dashboard 2025-05-14 15:16:54 +08:00
Yening Qin
eb330f00b2 feat: embedded product api (#2671) 2025-05-14 14:55:50 +08:00
Yening Qin
49d61bbd5d refactor: merge to main (#2670) 2025-05-14 14:46:05 +08:00
Ulric Qin
407a1b61a5 refactor linux dashboard 2025-05-14 11:58:28 +08:00
Ulric Qin
bc8a6f61be refactor node-exporter dashboard 2025-05-14 11:48:23 +08:00
Ulric Qin
94cd9796bf rename some dashboards of Linux 2025-05-13 20:45:24 +08:00
Ulric Qin
c3ee0143b2 refactor os dashboards 2025-05-13 20:43:14 +08:00
Ulric Qin
10d4faae4e refactor os dashboard 2025-05-13 20:21:38 +08:00
Yening Qin
ffac81a2ef fix: alert rule verify (#2668) 2025-05-13 18:53:08 +08:00
Yening Qin
d8d1a454b3 fix: default ds id update (#2664) 2025-05-13 15:39:42 +08:00
Yening Qin
94f9818fd2 docs: update k8s dashboards and fix alert rule name check (#2663) 2025-05-13 14:59:38 +08:00
Asklv
a5d820ddb3 fix: api panic when gomail dial tcp failed. (#2661) 2025-05-12 20:08:37 +08:00
smx_Morgan
da0224d010 fix: Solved the problem of NaN value of prom not parsing json (#2652) 2025-05-12 18:28:57 +08:00
Yening Qin
4a399a23c0 refactor: change log query api 2025-05-12 15:42:18 +08:00
Ulric Qin
95ecc61834 refactor ops and i18n 2025-05-08 18:43:57 +08:00
Ulric Qin
f72e29677f refactor test case 2025-05-08 17:41:28 +08:00
Ulric Qin
f876eb02e2 fix multi role_operation 2025-04-28 17:20:58 +08:00
Ulric Qin
cdcadefb03 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-04-28 17:19:17 +08:00
Ulric Qin
582a3981fb delete Admon role_operation 2025-04-28 17:19:03 +08:00
smx_Morgan
8081c48450 fix :record rule name change is not synchronized (#2636) 2025-04-28 15:14:37 +08:00
Yening Qin
5e7541215a refactor: es add offset query and add es-index-pattern ops 2025-04-27 18:54:36 +08:00
ning
e95b5428b2 Merge branch 'main' of github.com:ccfos/nightingale 2025-04-25 23:32:39 +08:00
ning
8a47088d97 refactor: update datasource api 2025-04-25 23:32:23 +08:00
Ulric Qin
05ba5caf8a code refactor 2025-04-25 19:21:57 +08:00
Ulric Qin
dc7752c2af code refactor 2025-04-25 19:18:39 +08:00
109 changed files with 9773 additions and 10704 deletions

121
README.md
View File

@@ -3,7 +3,7 @@
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家 一体化的可观测平台</b>
<b>开源告警管理专家</b>
</p>
<p align="center">
@@ -27,77 +27,86 @@
[English](./README_en.md) | [中文](./README.md)
## 夜莺 Nightingale 是什么
## 夜莺是什么
> 夜莺 Nightingale 是什么,解决什么问题?以大家都很熟悉的 Grafana 做个类比Grafana 擅长对接各种各样的数据源,然后提供灵活、强大、好看的可视化面板。夜莺则擅长对接各种多样的数据源,提供灵活、强大、高效的监控告警管理能力。从发展路径和定位来说,夜莺和 Grafana 很像,可以总结为一句话:可视化就用 Grafana监控告警就找夜莺
>
> 在可视化领域Grafana 是毫无争议的领导者Grafana 在影响力、装机量、用户群、开发者数量等各个维度的数字上相比夜莺都是追赶的榜样。巨无霸往往都是从一个切入点打开局面的Grafana Labs 有了在可视化领域 Grafana 这个王牌,逐步扩展到整个可观测性方向,比如 Logging 维度有 LokiTracing 维度有 TempoProfiling 维度有收购来的 PyroscopeOn-call 维度有同样是收购来的 Grafana-OnCall 项目,还有时序数据库 Mimir、eBPF 采集器 Beyla、OpenTelemetry 采集器 Alloy、前端监控 SDK Faro最终构成了一个完整的可观测性工具矩阵但整个飞轮都是从 Grafana 项目开始转动起来的。
>
>夜莺,则是从监控告警这个切入点打开局面,也逐步横向做了相应扩展,比如夜莺也自研了可视化面板,如果你想有一个 all-in-one 的监控告警+可视化的工具,那么用夜莺也是正确的选择;比如 OnCall 方向,夜莺可以和 [Flashduty SaaS](https://flashcat.cloud/product/flashcat-duty/) 服务无缝的集成;在采集器方向,夜莺有配套的 [Categraf](https://flashcat.cloud/product/categraf),可以一个采集器中管理所有的 exporter并同时支持指标和日志的采集极大减轻工程师维护的采集器数量和工作量这个点太痛了你可能也遇到过业务团队吐槽采集器数量比业务应用进程数量还多的窘况吧
夜莺监控(Nightingale)是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发
夜莺 Nightingale 作为一款开源云原生监控工具,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。在 GitHub 上有超过 10000 颗星,是广受关注和使用的开源监控工具。夜莺的核心研发团队,也是 Open-Falcon 项目原核心研发人员,从 2014 年Open-Falcon 是 2014 年开源)算起来,也有 10 年了,只为把监控做到极致。
夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。
## 夜莺的工作逻辑
## 快速开始
- 👉 [文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- 为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
很多用户已经自行采集了指标、日志数据此时就把存储库VictoriaMetrics、ElasticSearch等作为数据源接入夜莺即可在夜莺里配置告警规则、通知规则完成告警事件的生成和派发。
## 功能特点
![夜莺产品架构](doc/img/readme/20240221152601.png)
- 对接多种时序库,实现统一监控告警管理:支持对接的时序库包括 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB、TDengine 等
- 对接日志库,实现针对日志的监控告警:支持对接的日志库包括 ElasticSearch、Loki 等。
- 专业告警能力:内置支持多种告警规则,可以扩展支持常见通知媒介,支持告警屏蔽/抑制/订阅/自愈、告警事件管理。
- 高性能可视化引擎:支持多种图表样式,内置众多 Dashboard 模版,也可导入 Grafana 模版,开箱即用,开源协议商业友好。
- 支持常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、Telegraf、Grafana-agent、Datadog-agent、各种 Exporter 作为采集器,没有什么数据是不能监控的。
- 👀无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/)实现告警聚合收敛、认领、升级、排班、IM集成确保告警处理不遗漏减少打扰高效协同。
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
## 截图演示
你可以在页面的右上角,切换语言和主题,目前我们支持英语、简体中文、繁体中文。
![语言切换](doc/img/readme/n9e-switch-i18n.png)
即时查询,类似 Prometheus 内置的查询分析页面,做 ad-hoc 查询,夜莺做了一些 UI 优化,同时提供了一些内置 promql 指标,让不太了解 promql 的用户也可以快速查询。
![即时查询](doc/img/readme/20240513103305.png)
当然,也可以直接通过指标视图查看,有了指标视图,即时查询基本可以不用了,或者只有高端玩家使用即时查询,普通用户直接通过指标视图查询即可。
![指标视图](doc/img/readme/20240513103530.png)
夜莺内置了常用仪表盘,可以直接导入使用。也可以导入 Grafana 仪表盘,不过只能兼容 Grafana 基本图表,如果已经习惯了 Grafana 建议继续使用 Grafana 看图,把夜莺作为一个告警引擎使用。
![内置仪表盘](doc/img/readme/20240513103628.png)
除了内置的仪表盘,也内置了很多告警规则,开箱即用。
![内置告警规则](doc/img/readme/20240513103825.png)
## 产品架构
社区使用夜莺最多的场景就是使用夜莺做告警引擎,对接多套时序库,统一告警规则管理。绘图仍然使用 Grafana 居多。作为一个告警引擎,夜莺的产品架构如下:
![产品架构](doc/img/readme/20240221152601.png)
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,我们也提供边缘机房告警引擎下沉部署模式,这个模式下,即便网络割裂,告警功能也不受影响。
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
![边缘部署模式](doc/img/readme/20240222102119.png)
> 上图中机房A和中心机房的网络链路很好所以直接由中心端的夜莺进程做告警引擎机房B和中心机房的网络链路不好所以在机房B部署了 `n9e-edge` 做告警引擎对机房B的数据源做告警判定。
## 交流渠道
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`
## 告警降噪、升级、协同
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
如果您有更高级的需求,比如:
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
那夜莺是不合适的,您需要的是 [PagerDuty](https://www.pagerduty.com/) 或 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) (产品易用,且有免费套餐)这样的 On-call 产品。
## 相关资料 & 交流渠道
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助PPT链接在文末
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
## 关键特性简介
![夜莺告警规则](doc/img/readme/2025-05-23_18-43-37.png)
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
- 支持业务组概念,引入权限体系,分门别类管理各类规则
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
![夜莺事件大盘](doc/img/readme/2025-05-30_08-49-28.png)
- 夜莺存档了历史告警事件,支持多维度的查询和统计
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
![夜莺集成中心](doc/img/readme/2025-05-23_18-46-06.png)
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
![夜莺仪表盘](doc/img/readme/2025-05-23_18-49-02.png)
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
- 如果你已经习惯了 Grafana建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 感谢众多企业的信赖
![夜莺客户](doc/img/readme/logos.png)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者

View File

@@ -115,7 +115,9 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, alertc.Alerting, ctx, alertStats)
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)

View File

@@ -25,6 +25,7 @@ type Stats struct {
CounterHeartbeatErrorTotal *prometheus.CounterVec
CounterSubEventTotal *prometheus.CounterVec
GaugeQuerySeriesCount *prometheus.GaugeVec
GaugeRuleEvalDuration *prometheus.GaugeVec
GaugeNotifyRecordQueueSize prometheus.Gauge
}
@@ -136,6 +137,13 @@ func NewSyncStats() *Stats {
Help: "The size of notify record queue.",
})
GaugeRuleEvalDuration := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rule_eval_duration_ms",
Help: "Duration of rule eval in milliseconds.",
}, []string{"rule_id", "datasource_id"})
CounterVarFillingQuery := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
@@ -158,6 +166,7 @@ func NewSyncStats() *Stats {
CounterHeartbeatErrorTotal,
CounterSubEventTotal,
GaugeQuerySeriesCount,
GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize,
CounterVarFillingQuery,
)
@@ -177,6 +186,7 @@ func NewSyncStats() *Stats {
CounterHeartbeatErrorTotal: CounterHeartbeatErrorTotal,
CounterSubEventTotal: CounterSubEventTotal,
GaugeQuerySeriesCount: GaugeQuerySeriesCount,
GaugeRuleEvalDuration: GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize: GaugeNotifyRecordQueueSize,
CounterVarFillingQuery: CounterVarFillingQuery,
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -35,6 +36,7 @@ type Dispatch struct {
notifyRuleCache *memsto.NotifyRuleCacheType
notifyChannelCache *memsto.NotifyChannelCacheType
messageTemplateCache *memsto.MessageTemplateCacheType
eventProcessorCache *memsto.EventProcessorCacheType
alerting aconf.Alerting
@@ -54,7 +56,7 @@ type Dispatch struct {
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
messageTemplateCache *memsto.MessageTemplateCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
@@ -66,6 +68,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
notifyRuleCache: notifyRuleCache,
notifyChannelCache: notifyChannelCache,
messageTemplateCache: messageTemplateCache,
eventProcessorCache: eventProcessorCache,
alerting: alerting,
@@ -77,6 +80,12 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
ctx: ctx,
Astats: astats,
}
pipeline.Init()
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
return notify
}
@@ -141,11 +150,14 @@ func (e *Dispatch) reloadTpls() error {
return nil
}
func (e *Dispatch) HandleEventWithNotifyRule(event *models.AlertCurEvent, isSubscribe bool) {
func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent) {
if len(event.NotifyRuleIDs) > 0 {
for _, notifyRuleId := range event.NotifyRuleIDs {
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, event)
if len(eventOrigin.NotifyRuleIds) > 0 {
for _, notifyRuleId := range eventOrigin.NotifyRuleIds {
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
@@ -155,33 +167,108 @@ func (e *Dispatch) HandleEventWithNotifyRule(event *models.AlertCurEvent, isSubs
continue
}
var processors []models.Processor
for _, pipelineConfig := range notifyRule.PipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
continue
}
if !pipelineApplicable(eventPipeline, eventCopy) {
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
continue
}
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
}
for _, processor := range processors {
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
eventCopy = processor.Process(e.ctx, eventCopy)
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
if eventCopy == nil {
logger.Warningf("notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
break
}
}
if eventCopy == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
continue
}
// notify
for i := range notifyRule.NotifyConfigs {
if !NotifyRuleApplicable(&notifyRule.NotifyConfigs[i], event) {
if !NotifyRuleApplicable(&notifyRule.NotifyConfigs[i], eventCopy) {
continue
}
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{event}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, event, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, event, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{event}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
}
// todo go send
// todo 聚合 event
go e.sendV2([]*models.AlertCurEvent{event}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
go e.sendV2([]*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
}
}
}
}
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
if !pipeline.FilterEnable {
return true
}
tagMatch := true
if len(pipeline.LabelFilters) > 0 {
for i := range pipeline.LabelFilters {
if pipeline.LabelFilters[i].Func == "" {
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
}
}
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
attributesMatch := true
if len(pipeline.AttrFilters) > 0 {
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
return false
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
return tagMatch && attributesMatch
}
func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) bool {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
@@ -359,35 +446,34 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
switch notifyChannel.RequestType {
case "flashduty":
if len(flashDutyChannelIDs) == 0 {
flashDutyChannelIDs = []int64{0} // 如果 flashduty 通道没有配置,则使用 0, 给 SendFlashDuty 判断使用, 不给 flashduty 传 channel_id 参数
}
for i := range flashDutyChannelIDs {
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
return
case "http":
if e.notifyChannelCache.HttpConcurrencyAdd(notifyChannel.ID) {
defer e.notifyChannelCache.HttpConcurrencyDone(notifyChannel.ID)
}
if notifyChannel.RequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, request config not found", notifyRuleId, notifyChannel.Name, events[0])
// 使用队列模式处理 http 通知
// 创建通知任务
task := &memsto.NotifyTask{
Events: events,
NotifyRuleId: notifyRuleId,
NotifyChannel: notifyChannel,
TplContent: tplContent,
CustomParams: customParams,
Sendtos: sendtos,
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, http request config not found", notifyRuleId, notifyChannel.Name, events[0])
}
if NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos, resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), resp, err)
} else {
for i := range sendtos {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, []string{sendtos[i]}), resp, err)
}
// 将任务加入队列
success := e.notifyChannelCache.EnqueueNotifyTask(task)
if !success {
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
// 如果入队失败,记录错误通知
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
}
case "smtp":
@@ -416,11 +502,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
return
}
if e.blockEventNotify(rule, event) {
logger.Infof("block event notify: rule_id:%d event:%+v", rule.Id, event)
return
}
fillUsers(event, e.userCache, e.userGroupCache)
var (
@@ -448,8 +529,7 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
go e.HandleEventWithNotifyRule(event, isSubscribe)
go e.HandleEventWithNotifyRule(event)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
@@ -458,25 +538,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
}
}
func (e *Dispatch) blockEventNotify(rule *models.AlertRule, event *models.AlertCurEvent) bool {
ruleType := rule.GetRuleType()
// 若为机器则先看机器是否删除
if ruleType == models.HOST {
host, ok := e.targetCache.Get(event.TagsMap["ident"])
if !ok || host == nil {
return true
}
}
// 恢复通知,检测规则配置是否改变
// if event.IsRecovered && event.RuleHash != rule.Hash() {
// return true
// }
return false
}
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
// handle alert subscribes
subscribes := make([]*models.AlertSubscribe, 0)
@@ -646,6 +707,11 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
}
json.Unmarshal([]byte(rule.RuleConfig), &ruleConfig)
if event.IsRecovered {
// 恢复事件不需要走故障自愈的逻辑
return
}
for _, t := range ruleConfig.TaskTpls {
if t.TplId == 0 {
continue

View File

@@ -24,7 +24,7 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIDs,
event.NotifyRuleIds,
event.Cluster,
event.TagsJSON,
event.TriggerValue,

View File

@@ -172,7 +172,7 @@ func (arw *AlertRuleWorker) Eval() {
case models.LOKI:
anomalyPoints, err = arw.GetPromAnomalyPoint(cachedRule.RuleConfig)
default:
anomalyPoints, recoverPoints = arw.GetAnomalyPoint(cachedRule, arw.Processor.DatasourceId())
anomalyPoints, recoverPoints, err = arw.GetAnomalyPoint(cachedRule, arw.Processor.DatasourceId())
}
if err != nil {
@@ -232,6 +232,10 @@ func (arw *AlertRuleWorker) Stop() {
func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.AnomalyPoint, error) {
var lst []models.AnomalyPoint
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
@@ -742,6 +746,10 @@ func combine(paramKeys []string, paraMap map[string][]string, index int, current
func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.AnomalyPoint, error) {
var lst []models.AnomalyPoint
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
@@ -1414,13 +1422,18 @@ func fillVar(curRealQuery string, paramKey string, val string) string {
return curRealQuery
}
func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64) ([]models.AnomalyPoint, []models.AnomalyPoint) {
func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64) ([]models.AnomalyPoint, []models.AnomalyPoint, error) {
// 获取查询和规则判断条件
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
points := []models.AnomalyPoint{}
recoverPoints := []models.AnomalyPoint{}
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("rule_eval:%d promql is blank", rule.Id)
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1428,7 +1441,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
"",
).Set(0)
return points, recoverPoints
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
}
var ruleQuery models.RuleQuery
@@ -1436,7 +1449,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
if err != nil {
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
return points, recoverPoints
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
}
arw.Inhibit = ruleQuery.Inhibit
@@ -1456,7 +1469,8 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
).Set(-2)
continue
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
}
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
@@ -1471,7 +1485,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-1)
continue
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
}
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1505,6 +1519,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, query := range ruleQuery.Queries {
ref, unit, err := GetQueryRefAndUnit(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
continue
}
unitMap[ref] = unit
@@ -1671,5 +1686,5 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
}
}
return points, recoverPoints
return points, recoverPoints, nil
}

View File

@@ -44,6 +44,12 @@ func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) b
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
if rule.EnableDaysOfWeek == "" {
// 如果规则没有配置生效时间,则默认全天生效
return false
}
enableStime := strings.Fields(rule.EnableStime)
enableEtime := strings.Fields(rule.EnableEtime)
enableDaysOfWeek := strings.Split(rule.EnableDaysOfWeek, ";")
@@ -129,7 +135,7 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
}
for i := 0; i < len(mutes); i++ {
if matchMute(event, mutes[i]) {
if MatchMute(event, mutes[i]) {
return true, mutes[i].Id
}
}
@@ -137,13 +143,12 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
return false, 0
}
// matchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
// MatchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
if mute.Disabled == 1 {
return false
}
// 如果不是全局的,判断 匹配的 datasource id
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
@@ -166,7 +171,7 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
if len(clock) > 0 {
ts = clock[0]
}
if !mute.IsWithinPeriodicMute(ts) {
return false
}

View File

@@ -0,0 +1,11 @@
package pipeline
import (
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
)
func Init() {
}

View File

@@ -0,0 +1,106 @@
package callback
import (
"crypto/tls"
"encoding/json"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
type HTTPConfig struct {
URL string `json:"url"`
Method string `json:"method,omitempty"`
Body string `json:"body,omitempty"`
Headers map[string]string `json:"headers"`
AuthUsername string `json:"auth_username"`
AuthPassword string `json:"auth_password"`
Timeout int `json:"timeout"` // 单位:ms
SkipSSLVerify bool `json:"skip_ssl_verify"`
Proxy string `json:"proxy"`
Client *http.Client `json:"-"`
}
// RelabelConfig
type CallbackConfig struct {
HTTPConfig
}
func init() {
models.RegisterProcessor("callback", &CallbackConfig{})
}
func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*CallbackConfig](settings)
return result, err
}
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
logger.Errorf("failed to parse proxy url: %v", err)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
logger.Errorf("failed to marshal event: %v", err)
return event
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
logger.Errorf("failed to create request: %v event: %v", err, event)
return event
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
logger.Errorf("failed to send request: %v event: %v", err, event)
return event
}
b, err := io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("failed to read response body: %v event: %v", err, event)
return event
}
logger.Infof("response body: %s", string(b))
return event
}

View File

@@ -0,0 +1,24 @@
package common
import (
"encoding/json"
)
// InitProcessor 是一个通用的初始化处理器的方法
// 使用泛型简化处理器初始化逻辑
// T 必须是 models.Processor 接口的实现
func InitProcessor[T any](settings interface{}) (T, error) {
var zero T
b, err := json.Marshal(settings)
if err != nil {
return zero, err
}
var result T
err = json.Unmarshal(b, &result)
if err != nil {
return zero, err
}
return result, nil
}

View File

@@ -0,0 +1,61 @@
package eventdrop
import (
"bytes"
"strings"
texttemplate "text/template"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/toolkits/pkg/logger"
)
type EventDropConfig struct {
Content string `json:"content"`
}
func init() {
models.RegisterProcessor("event_drop", &EventDropConfig{})
}
func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventDropConfig](settings)
return result, err
}
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
// 在标签过滤和属性过滤都不满足需求时可以使用
// 如果模板执行结果为 true则删除该事件
var defs = []string{
"{{ $event := . }}",
"{{ $labels := .TagsMap }}",
"{{ $value := .TriggerValue }}",
}
text := strings.Join(append(defs, c.Content), "")
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
logger.Errorf("processor failed to parse template: %v event: %v", err, event)
return event
}
var body bytes.Buffer
if err = tpl.Execute(&body, event); err != nil {
logger.Errorf("processor failed to execute template: %v event: %v", err, event)
return event
}
result := strings.TrimSpace(body.String())
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
logger.Infof("processor eventdrop drop event: %v", event)
return nil
}
return event
}

View File

@@ -0,0 +1,95 @@
package eventupdate
import (
"crypto/tls"
"encoding/json"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
// RelabelConfig
type EventUpdateConfig struct {
callback.HTTPConfig
}
func init() {
models.RegisterProcessor("event_update", &EventUpdateConfig{})
}
func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventUpdateConfig](settings)
return result, err
}
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
logger.Errorf("failed to parse proxy url: %v", err)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
logger.Errorf("failed to marshal event: %v", err)
return event
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
logger.Errorf("failed to create request: %v event: %v", err, event)
return event
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
logger.Errorf("failed to send request: %v event: %v", err, event)
return event
}
b, err := io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("failed to read response body: %v event: %v", err, event)
return event
}
logger.Infof("response body: %s", string(b))
json.Unmarshal(b, &event)
return event
}

View File

@@ -0,0 +1,107 @@
package relabel
import (
"fmt"
"regexp"
"strings"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
const (
REPLACE_DOT = "___"
)
// RelabelConfig
type RelabelConfig struct {
SourceLabels []string `json:"source_labels"`
Separator string `json:"separator"`
Regex string `json:"regex"`
RegexCompiled *regexp.Regexp
If string `json:"if"`
IfRegex *regexp.Regexp
Modulus uint64 `json:"modulus"`
TargetLabel string `json:"target_label"`
Replacement string `json:"replacement"`
Action string `json:"action"`
}
func init() {
models.RegisterProcessor("relabel", &RelabelConfig{})
}
func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*RelabelConfig](settings)
return result, err
}
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
for i := range r.SourceLabels {
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
}
relabelConfigs := []*pconf.RelabelConfig{
{
SourceLabels: sourceLabels,
Separator: r.Separator,
Regex: r.Regex,
RegexCompiled: r.RegexCompiled,
If: r.If,
IfRegex: r.IfRegex,
Modulus: r.Modulus,
TargetLabel: r.TargetLabel,
Replacement: r.Replacement,
Action: r.Action,
},
}
EventRelabel(event, relabelConfigs)
return event
}
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
labels := make([]prompb.Label, len(event.TagsJSON))
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
continue
}
event.OriginalTagsJSON[i] = tag
label[0] = strings.ReplaceAll(string(label[0]), ".", REPLACE_DOT)
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
for i := 0; i < len(relabelConfigs); i++ {
if relabelConfigs[i].Replacement == "" {
relabelConfigs[i].Replacement = "$1"
}
if relabelConfigs[i].Separator == "" {
relabelConfigs[i].Separator = ";"
}
if relabelConfigs[i].Regex == "" {
relabelConfigs[i].Regex = "(.*)"
}
}
gotLabels := writer.Process(labels, relabelConfigs...)
event.TagsJSON = make([]string, len(gotLabels))
event.TagsMap = make(map[string]string, len(gotLabels))
for i, label := range gotLabels {
label.Name = strings.ReplaceAll(string(label.Name), REPLACE_DOT, ".")
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
event.TagsMap[label.Name] = label.Value
}
event.Tags = strings.Join(event.TagsJSON, ",,")
}

View File

@@ -14,14 +14,13 @@ import (
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/prometheus/prompb"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
@@ -61,11 +60,9 @@ type Processor struct {
pendingsUseByRecover *AlertCurEventMap
inhibit bool
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
tagsMap map[string]string
tagsArr []string
groupName string
alertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
@@ -154,7 +151,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := p.BuildEvent(anomalyPoint, from, now, ruleHash)
event.NotifyRuleIDs = cachedRule.NotifyRuleIds
event.NotifyRuleIds = cachedRule.NotifyRuleIds
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
@@ -196,7 +193,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, now int64, ruleHash string) *models.AlertCurEvent {
p.fillTags(anomalyPoint)
p.mayHandleIdent()
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
ds := p.datasourceCache.GetById(p.datasourceId)
var dsName string
@@ -216,8 +213,6 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
event.DatasourceId = p.datasourceId
event.Cluster = dsName
event.Hash = hash
event.TargetIdent = p.target
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TriggerValuesJson = models.EventTriggerValues{ValuesWithUnit: anomalyPoint.ValuesUnit}
@@ -249,15 +244,6 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
}
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
event.TriggerValue = event.TriggerValues
@@ -271,6 +257,19 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
// 生成事件之后,立马进程 relabel 处理
Relabel(p.rule, event)
// 放到 Relabel(p.rule, event) 下面,为了处理 relabel 之后,标签里才出现 ident 的情况
p.mayHandleIdent(event)
if event.TargetIdent != "" {
if pt, exist := p.TargetCache.Get(event.TargetIdent); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
}
}
return event
}
@@ -279,44 +278,15 @@ func Relabel(rule *models.AlertRule, event *models.AlertCurEvent) {
return
}
if len(rule.EventRelabelConfig) == 0 {
return
}
// need to keep the original label
event.OriginalTags = event.Tags
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
labels := make([]prompb.Label, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.SplitN(tag, "=", 2)
event.OriginalTagsJSON[i] = tag
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
if len(rule.EventRelabelConfig) == 0 {
return
}
for i := 0; i < len(rule.EventRelabelConfig); i++ {
if rule.EventRelabelConfig[i].Replacement == "" {
rule.EventRelabelConfig[i].Replacement = "$1"
}
if rule.EventRelabelConfig[i].Separator == "" {
rule.EventRelabelConfig[i].Separator = ";"
}
if rule.EventRelabelConfig[i].Regex == "" {
rule.EventRelabelConfig[i].Regex = "(.*)"
}
}
// relabel process
relabels := writer.Process(labels, rule.EventRelabelConfig...)
event.TagsJSON = make([]string, len(relabels))
event.TagsMap = make(map[string]string, len(relabels))
for i, label := range relabels {
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
event.TagsMap[label.Name] = label.Value
}
event.Tags = strings.Join(event.TagsJSON, ",,")
relabel.EventRelabel(event, rule.EventRelabelConfig)
}
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, inhibit bool) {
@@ -567,7 +537,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
if alertRule == nil {
continue
}
event.NotifyRuleIDs = alertRule.NotifyRuleIds
event.NotifyRuleIds = alertRule.NotifyRuleIds
if event.Cate == models.HOST {
target, exists := p.TargetCache.Get(event.TargetIdent)
@@ -641,19 +611,19 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
p.tagsArr = labelMapToArr(tagsMap)
}
func (p *Processor) mayHandleIdent() {
func (p *Processor) mayHandleIdent(event *models.AlertCurEvent) {
// handle ident
if ident, has := p.tagsMap["ident"]; has {
if ident, has := event.TagsMap["ident"]; has {
if target, exists := p.TargetCache.Get(ident); exists {
p.target = target.Ident
p.targetNote = target.Note
event.TargetIdent = target.Ident
event.TargetNote = target.Note
} else {
p.target = ident
p.targetNote = ""
event.TargetIdent = ident
event.TargetNote = ""
}
} else {
p.target = ""
p.targetNote = ""
event.TargetIdent = ""
event.TargetNote = ""
}
}

View File

@@ -56,12 +56,13 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s_%s",
rrc.rule.Id,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
rrc.rule.Name,
))
}

View File

@@ -30,12 +30,14 @@ type IbexCallBacker struct {
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
return
}
@@ -43,8 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
return
}
@@ -63,17 +66,23 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
if host == "" {
if ident, has := event.TagsMap["ident"]; has {
host = ident
}
}
}
if host == "" {
logger.Error("event_callback_ibex: failed to get host")
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
return
}
@@ -83,21 +92,23 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
tpl := taskTplCache.Get(id)
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
return
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
return
}
@@ -122,7 +133,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
return
}
@@ -145,7 +156,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
return
}
@@ -167,7 +178,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
}
}
@@ -187,7 +198,7 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warning("event_callback_ibex: redis cache is nil")
logger.Warningf("event_callback_ibex: redis cache is nil, task: %+v", f)
return 0, fmt.Errorf("redis cache is nil")
}

View File

@@ -85,254 +85,221 @@ func MergeOperationConf() error {
const (
builtInOps = `
ops:
- name: dashboards
cname: Dashboards
ops:
- name: "/dashboards"
cname: View Dashboards
- name: "/dashboards/add"
cname: Add Dashboard
- name: "/dashboards/put"
cname: Modify Dashboard
- name: "/dashboards/del"
cname: Delete Dashboard
- name: "/embedded-dashboards/put"
cname: Modify Embedded Dashboard
- name: "/embedded-dashboards"
cname: View Embedded Dashboard
- name: "/public-dashboards"
cname: View Public Dashboard
- name: metric
cname: Time Series Metrics
ops:
- name: "/metric/explorer"
cname: View Metric Data
- name: "/object/explorer"
cname: View Object Data
- name: builtin-metrics
cname: Metric Views
ops:
- name: "/metrics-built-in"
cname: View Built-in Metrics
- name: "/builtin-metrics/add"
cname: Add Built-in Metric
- name: "/builtin-metrics/put"
cname: Modify Built-in Metric
- name: "/builtin-metrics/del"
cname: Delete Built-in Metric
- name: recording-rules
cname: Recording Rule Management
ops:
- name: "/recording-rules"
cname: View Recording Rules
- name: "/recording-rules/add"
cname: Add Recording Rule
- name: "/recording-rules/put"
cname: Modify Recording Rule
- name: "/recording-rules/del"
cname: Delete Recording Rule
- name: log
cname: Log Analysis
ops:
- name: "/log/explorer"
cname: View Logs
- name: "/log/index-patterns"
cname: View Index Patterns
- name: alert
cname: Alert Rules
ops:
- name: "/alert-rules"
cname: View Alert Rules
- name: "/alert-rules/add"
cname: Add Alert Rule
- name: "/alert-rules/put"
cname: Modify Alert Rule
- name: "/alert-rules/del"
cname: Delete Alert Rule
- name: alert-mutes
cname: Alert Silence Management
ops:
- name: "/alert-mutes"
cname: View Alert Silences
- name: "/alert-mutes/add"
cname: Add Alert Silence
- name: "/alert-mutes/put"
cname: Modify Alert Silence
- name: "/alert-mutes/del"
cname: Delete Alert Silence
- name: alert-subscribes
cname: Alert Subscription Management
ops:
- name: "/alert-subscribes"
cname: View Alert Subscriptions
- name: "/alert-subscribes/add"
cname: Add Alert Subscription
- name: "/alert-subscribes/put"
cname: Modify Alert Subscription
- name: "/alert-subscribes/del"
cname: Delete Alert Subscription
- name: alert-events
cname: Alert Event Management
ops:
- name: "/alert-cur-events"
cname: View Current Alerts
- name: "/alert-cur-events/del"
cname: Delete Current Alert
- name: "/alert-his-events"
cname: View Historical Alerts
- name: notification
cname: Alert Notification
ops:
- name: "/help/notification-settings"
cname: View Notification Settings
- name: "/help/notification-tpls"
cname: View Notification Templates
- name: job
cname: Task Management
ops:
- name: "/job-tpls"
cname: View Task Templates
- name: "/job-tpls/add"
cname: Add Task Template
- name: "/job-tpls/put"
cname: Modify Task Template
- name: "/job-tpls/del"
cname: Delete Task Template
- name: "/job-tasks"
cname: View Task Instances
- name: "/job-tasks/add"
cname: Add Task Instance
- name: "/job-tasks/put"
cname: Modify Task Instance
- name: targets
- name: Infrastructure
cname: Infrastructure
ops:
- name: "/targets"
cname: View Objects
- name: "/targets/add"
cname: Add Object
- name: "/targets/put"
cname: Modify Object
- name: "/targets/del"
cname: Delete Object
- name: "/targets/bind"
cname: Bind Object
- name: /targets
cname: Host - View
- name: /targets/put
cname: Host - Modify
- name: /targets/del
cname: Host - Delete
- name: /targets/bind
cname: Host - Bind Uncategorized
- name: user
cname: User Management
- name: Explorer
cname: Explorer
ops:
- name: "/users"
cname: View User List
- name: "/user-groups"
cname: View User Groups
- name: "/user-groups/add"
cname: Add User Group
- name: "/user-groups/put"
cname: Modify User Group
- name: "/user-groups/del"
cname: Delete User Group
- name: /metric/explorer
cname: Metrics Explorer
- name: /object/explorer
cname: Quick View
- name: /metrics-built-in
cname: Built-in Metric - View
- name: /builtin-metrics/add
cname: Built-in Metric - Add
- name: /builtin-metrics/put
cname: Built-in Metric - Modify
- name: /builtin-metrics/del
cname: Built-in Metric - Delete
- name: /recording-rules
cname: Recording Rule - View
- name: /recording-rules/add
cname: Recording Rule - Add
- name: /recording-rules/put
cname: Recording Rule - Modify
- name: /recording-rules/del
cname: Recording Rule - Delete
- name: /log/explorer
cname: Logs Explorer
- name: /log/index-patterns # 前端有个管理索引模式的页面,所以需要一个权限点来控制,后面应该改成侧拉板
cname: Index Pattern - View
- name: /log/index-patterns/add
cname: Index Pattern - Add
- name: /log/index-patterns/put
cname: Index Pattern - Modify
- name: /log/index-patterns/del
cname: Index Pattern - Delete
- name: /dashboards
cname: Dashboard - View
- name: /dashboards/add
cname: Dashboard - Add
- name: /dashboards/put
cname: Dashboard - Modify
- name: /dashboards/del
cname: Dashboard - Delete
- name: /public-dashboards
cname: Dashboard - View Public
- name: busi-groups
cname: Business Group Management
- name: alerting
cname: Alerting
ops:
- name: "/busi-groups"
cname: View Business Groups
- name: "/busi-groups/add"
cname: Add Business Group
- name: "/busi-groups/put"
cname: Modify Business Group
- name: "/busi-groups/del"
cname: Delete Business Group
- name: /alert-rules
cname: Alerting Rule - View
- name: /alert-rules/add
cname: Alerting Rule - Add
- name: /alert-rules/put
cname: Alerting Rule - Modify
- name: /alert-rules/del
cname: Alerting Rule - Delete
- name: /alert-mutes
cname: Mutting Rule - View
- name: /alert-mutes/add
cname: Mutting Rule - Add
- name: /alert-mutes/put
cname: Mutting Rule - Modify
- name: /alert-mutes/del
cname: Mutting Rule - Delete
- name: /alert-subscribes
cname: Subscribing Rule - View
- name: /alert-subscribes/add
cname: Subscribing Rule - Add
- name: /alert-subscribes/put
cname: Subscribing Rule - Modify
- name: /alert-subscribes/del
cname: Subscribing Rule - Delete
- name: /job-tpls
cname: Self-healing-Script - View
- name: /job-tpls/add
cname: Self-healing-Script - Add
- name: /job-tpls/put
cname: Self-healing-Script - Modify
- name: /job-tpls/del
cname: Self-healing-Script - Delete
- name: /job-tasks
cname: Self-healing-Job - View
- name: /job-tasks/add
cname: Self-healing-Job - Add
- name: /job-tasks/put
cname: Self-healing-Job - Modify
- name: /alert-cur-events
cname: Active Event - View
- name: /alert-cur-events/del
cname: Active Event - Delete
- name: /alert-his-events
cname: Historical Event - View
- name: permissions
cname: Permission Management
- name: Notification
cname: Notification
ops:
- name: "/permissions"
cname: View Permission Settings
- name: contacts
cname: User Contact Management
ops:
- name: "/contacts"
cname: User Contact Management
- name: /notification-rules
cname: Notification Rule - View
- name: /notification-rules/add
cname: Notification Rule - Add
- name: /notification-rules/put
cname: Notification Rule - Modify
- name: /notification-rules/del
cname: Notification Rule - Delete
- name: /notification-channels
cname: Media Type - View
- name: /notification-channels/add
cname: Media Type - Add
- name: /notification-channels/put
cname: Media Type - Modify
- name: /notification-channels/del
cname: Media Type - Delete
- name: /notification-templates
cname: Message Template - View
- name: /notification-templates/add
cname: Message Template - Add
- name: /notification-templates/put
cname: Message Template - Modify
- name: /notification-templates/del
cname: Message Template - Delete
- name: /event-pipelines
cname: Event Pipeline - View
- name: /event-pipelines/add
cname: Event Pipeline - Add
- name: /event-pipelines/put
cname: Event Pipeline - Modify
- name: /event-pipelines/del
cname: Event Pipeline - Delete
- name: /help/notification-settings # 用于控制老版本的通知设置菜单是否展示
cname: Notification Settings - View
- name: /help/notification-tpls # 用于控制老版本的通知模板菜单是否展示
cname: Notification Templates - View
- name: built-in-components
cname: Template Center
- name: Integrations
cname: Integrations
ops:
- name: "/built-in-components"
cname: View Built-in Components
- name: "/built-in-components/add"
cname: Add Built-in Component
- name: "/built-in-components/put"
cname: Modify Built-in Component
- name: "/built-in-components/del"
cname: Delete Built-in Component
- name: /datasources # 用于控制能否看到数据源列表页面的菜单。只有 Admin 才能修改、删除数据源
cname: Data Source - View
- name: /components
cname: Component - View
- name: /components/add
cname: Component - Add
- name: /components/put
cname: Component - Modify
- name: /components/del
cname: Component - Delete
- name: /embedded-products
cname: Embedded Product - View
- name: /embedded-product/add
cname: Embedded Product - Add
- name: /embedded-product/put
cname: Embedded Product - Modify
- name: /embedded-product/delete
cname: Embedded Product - Delete
- name: datasource
cname: Data Source Management
- name: Organization
cname: Organization
ops:
- name: "/help/source"
cname: View Data Source Configuration
- name: /users
cname: User - View
- name: /users/add
cname: User - Add
- name: /users/put
cname: User - Modify
- name: /users/del
cname: User - Delete
- name: /user-groups
cname: Team - View
- name: /user-groups/add
cname: Team - Add
- name: /user-groups/put
cname: Team - Modify
- name: /user-groups/del
cname: Team - Delete
- name: /busi-groups
cname: Business Group - View
- name: /busi-groups/add
cname: Business Group - Add
- name: /busi-groups/put
cname: Business Group - Modify
- name: /busi-groups/del
cname: Business Group - Delete
- name: /roles
cname: Role - View
- name: /roles/add
cname: Role - Add
- name: /roles/put
cname: Role - Modify
- name: /roles/del
cname: Role - Delete
- name: system
cname: System Information
- name: System Settings
cname: System Settings
ops:
- name: "/help/variable-configs"
cname: View Variable Configuration
- name: "/help/version"
cname: View Version Information
- name: "/help/servers"
cname: View Server Information
- name: "/help/sso"
cname: View SSO Configuration
- name: "/site-settings"
- name: /system/site-settings # 仅用于控制能否展示菜单,只有 Admin 才能修改、删除
cname: View Site Settings
- name: /system/variable-settings
cname: View Variable Settings
- name: /system/sso-settings
cname: View SSO Settings
- name: /system/alerting-engines
cname: View Alerting Engines
- name: /system/version
cname: View Product Version
- name: message-templates
cname: Message Templates
ops:
- name: "/notification-templates"
cname: View Message Templates
- name: "/notification-templates/add"
cname: Add Message Templates
- name: "/notification-templates/put"
cname: Modify Message Templates
- name: "/notification-templates/del"
cname: Delete Message Templates
- name: notify-rules
cname: Notify Rules
ops:
- name: "/notification-rules"
cname: View Notify Rules
- name: "/notification-rules/add"
cname: Add Notify Rules
- name: "/notification-rules/put"
cname: Modify Notify Rules
- name: "/notification-rules/del"
cname: Delete Notify Rules
- name: notify-channels
cname: Notify Channels
ops:
- name: "/notification-channels"
cname: View Notify Channels
- name: "/notification-channels/add"
cname: Add Notify Channels
- name: "/notification-channels/put"
cname: Modify Notify Channels
- name: "/notification-channels/del"
cname: Delete Notify Channels
`
)

View File

@@ -25,4 +25,10 @@ var Plugins = []Plugin{
Type: "tdengine",
TypeName: "TDengine",
},
{
Id: 5,
Category: "logging",
Type: "ck",
TypeName: "ClickHouse",
},
}

View File

@@ -13,7 +13,6 @@ import (
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cconf/rsa"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/center/metas"
centerrt "github.com/ccfos/nightingale/v6/center/router"
@@ -60,7 +59,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
i18nx.Init(configDir)
cstats.Init()
flashduty.Init(config.Center.FlashDuty)
db, err := storage.New(config.DB)
@@ -86,7 +84,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
metas := metas.New(redis)
idents := idents.New(ctx, redis)
idents := idents.New(ctx, redis, config.Pushgw)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
@@ -94,6 +92,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if config.Center.MigrateBusiGroupLabel || models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, config.Pushgw.BusiGroupLabelKey)
}
if models.CanMigrateEP(ctx) {
models.MigrateEP(ctx)
}
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)

View File

@@ -6,40 +6,49 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const Service = "n9e-center"
const (
namespace = "n9e"
subsystem = "center"
)
var (
labels = []string{"service", "code", "path", "method"}
uptime = prometheus.NewCounterVec(
uptime = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "uptime",
Help: "HTTP service uptime.",
}, []string{"service"},
)
RequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_request_count_total",
Help: "Total number of HTTP requests made.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Name: "uptime",
Help: "HTTP service uptime.",
},
)
RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Buckets: []float64{.01, .1, 1, 10},
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Buckets: prometheus.DefBuckets,
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, []string{"code", "path", "method"},
)
RedisOperationLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "redis_operation_latency_seconds",
Help: "Histogram of latencies for Redis operations",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5},
},
[]string{"operation", "status"},
)
)
func Init() {
func init() {
// Register the summary and the histogram with Prometheus's default registry.
prometheus.MustRegister(
uptime,
RequestCounter,
RequestDuration,
RedisOperationLatency,
)
go recordUptime()
@@ -48,6 +57,6 @@ func Init() {
// recordUptime increases service uptime per second.
func recordUptime() {
for range time.Tick(time.Second) {
uptime.WithLabelValues(Service).Inc()
uptime.Inc()
}
}

View File

@@ -6,6 +6,7 @@ import (
"sync"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/storage"
@@ -115,15 +116,23 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
newMap[models.WrapIdent(ident)] = meta
}
start := time.Now()
err := storage.MSet(context.Background(), s.redis, newMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "success").Observe(time.Since(start).Seconds())
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "success").Observe(time.Since(start).Seconds())
}
}

View File

@@ -93,10 +93,9 @@ func stat() gin.HandlerFunc {
code := fmt.Sprintf("%d", c.Writer.Status())
method := c.Request.Method
labels := []string{cstats.Service, code, c.FullPath(), method}
labels := []string{code, c.FullPath(), method}
cstats.RequestCounter.WithLabelValues(labels...).Inc()
cstats.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
cstats.RequestDuration.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
}
}
@@ -265,11 +264,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.DELETE("/self/token/:id", rt.auth(), rt.user(), rt.deleteToken)
pages.GET("/users", rt.auth(), rt.user(), rt.perm("/users"), rt.userGets)
pages.POST("/users", rt.auth(), rt.admin(), rt.userAddPost)
pages.POST("/users", rt.auth(), rt.user(), rt.perm("/users/add"), rt.userAddPost)
pages.GET("/user/:id/profile", rt.auth(), rt.userProfileGet)
pages.PUT("/user/:id/profile", rt.auth(), rt.admin(), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.admin(), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.admin(), rt.userDel)
pages.PUT("/user/:id/profile", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.user(), rt.perm("/users/del"), rt.userDel)
pages.GET("/metric-views", rt.auth(), rt.metricViewGets)
pages.DELETE("/metric-views", rt.auth(), rt.user(), rt.metricViewDel)
@@ -390,6 +389,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
pages.GET("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGet)
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
pages.POST("/alert-mute-tryrun", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.alertMuteTryRun)
pages.GET("/busi-groups/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGetsByGids)
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
@@ -444,13 +444,13 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
pages.GET("/roles", rt.auth(), rt.admin(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.admin(), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.admin(), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.admin(), rt.roleDel)
pages.GET("/roles", rt.auth(), rt.user(), rt.perm("/roles"), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
pages.GET("/role/:id/ops", rt.auth(), rt.admin(), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
pages.GET("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles"), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.roleBindOperation)
pages.GET("/operation", rt.operations)
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.notifyTplGets)
@@ -472,7 +472,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channel", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyChannelGets)
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyContactGets)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.notifyContactGets)
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
pages.GET("/notify-config", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyConfigGet)
@@ -481,13 +481,20 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/es-index-pattern", rt.auth(), rt.esIndexPatternGet)
pages.GET("/es-index-pattern-list", rt.auth(), rt.esIndexPatternGetList)
pages.POST("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternDel)
pages.POST("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/add"), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/put"), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/del"), rt.esIndexPatternDel)
pages.GET("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards"), rt.embeddedDashboardsGet)
pages.PUT("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards/put"), rt.embeddedDashboardsPut)
// 获取 embedded-product 列表
pages.GET("/embedded-product", rt.auth(), rt.user(), rt.embeddedProductGets)
pages.GET("/embedded-product/:id", rt.auth(), rt.user(), rt.embeddedProductGet)
pages.POST("/embedded-product", rt.auth(), rt.user(), rt.perm("/embedded-product/add"), rt.embeddedProductAdd)
pages.PUT("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/put"), rt.embeddedProductPut)
pages.DELETE("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/delete"), rt.embeddedProductDelete)
pages.GET("/user-variable-configs", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigPut)
@@ -497,20 +504,23 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/config", rt.auth(), rt.admin(), rt.configPutByKey)
pages.GET("/site-info", rt.siteInfo)
// source token 相关路由
pages.POST("/source-token", rt.auth(), rt.user(), rt.sourceTokenAdd)
// for admin api
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinComponentsDel)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinComponentsDel)
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinPayloadsAdd)
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
@@ -527,6 +537,16 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-rules", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRulesGet)
pages.POST("/notify-rule/test", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyTest)
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
// 事件Pipeline相关路由
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
pages.PUT("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/put"), rt.updateEventPipeline)
pages.GET("/event-pipeline/:id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipeline)
pages.DELETE("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines/del"), rt.deleteEventPipelines)
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
@@ -647,6 +667,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
}
}

View File

@@ -1,50 +1,54 @@
package router
import (
"fmt"
"net/http"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func parseAggrRules(c *gin.Context) []*models.AggrRule {
aggrRules := strings.Split(ginx.QueryStr(c, "rule", ""), "::") // e.g. field:group_name::field:severity::tagkey:ident
if len(aggrRules) == 0 {
ginx.Bomb(http.StatusBadRequest, "rule empty")
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
if !myGroups {
return nil, nil
}
rules := make([]*models.AggrRule, len(aggrRules))
for i := 0; i < len(aggrRules); i++ {
pair := strings.Split(aggrRules[i], ":")
if len(pair) != 2 {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
if !(pair[0] == "field" || pair[0] == "tagkey") {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
rules[i] = &models.AggrRule{
Type: pair[0],
Value: pair[1],
}
}
return rules
me := ctx.MustGet("user").(*models.User)
return models.MyGroupIds(rt.Ctx, me.Id)
}
func (rt *Router) alertCurEventsCard(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
var gids []int64
var err error
if myGroups {
gids, err = getUserGroupIds(c, rt, myGroups)
ginx.Dangerous(err)
if len(gids) == 0 {
gids = append(gids, -1)
}
}
viewId := ginx.QueryInt64(c, "view_id")
alertView, err := models.GetAlertAggrViewByViewID(rt.Ctx, viewId)
ginx.Dangerous(err)
if alertView == nil {
ginx.Bomb(http.StatusNotFound, "alert aggr view not found")
}
dsIds := queryDatasourceIds(c)
rules := parseAggrRules(c)
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
@@ -61,17 +65,18 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
cates = strings.Split(cate, ",")
}
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0)
cates, 0, query, 50000, 0, []int64{})
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
for _, event := range list {
title := event.GenCardTitle(rules)
title, err := event.GenCardTitle(alertView.Rule)
ginx.Dangerous(err)
if _, has := cardmap[title]; has {
cardmap[title].Total++
cardmap[title].EventIds = append(cardmap[title].EventIds, event.Id)
@@ -86,6 +91,10 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
Severity: event.Severity,
}
}
if cardmap[title].Severity < 1 {
cardmap[title].Severity = 3
}
}
titles := make([]string, 0, len(cardmap))
@@ -142,11 +151,15 @@ func (rt *Router) alertCurEventsGetByRid(c *gin.Context) {
// 列表方式,拉取活跃告警
func (rt *Router) alertCurEventsList(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
dsIds := queryDatasourceIds(c)
eventIds := strx.IdsInt64ForAPI(ginx.QueryStr(c, "event_ids", ""), ",")
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
prod = ginx.QueryStr(c, "rule_prods", "")
@@ -165,18 +178,19 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query)
cates, ruleId, query, eventIds)
ginx.Dangerous(err)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit))
cates, ruleId, query, limit, ginx.Offset(c, limit), eventIds)
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(rt.Ctx, cache)
}
@@ -218,24 +232,60 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
func (rt *Router) alertCurEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertCurEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
event, err := GetCurEventDetail(rt.Ctx, eid)
ginx.NewRender(c).Data(event, err)
}
func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, error) {
event, err := models.AlertCurEventGetById(ctx, eid)
if err != nil {
return nil, err
}
if event == nil {
ginx.Bomb(404, "No such active event")
return nil, fmt.Errorf("no such active event")
}
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
rt.bgroCheck(c, event.GroupId)
}
ruleConfig, needReset := models.FillRuleConfigTplName(rt.Ctx, event.RuleConfig)
ruleConfig, needReset := models.FillRuleConfigTplName(ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
ginx.NewRender(c).Data(event, nil)
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(ctx, event.NotifyRuleIds)
return event, err
}
func GetEventNorifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
notifyRuleNames := make([]*models.EventNotifyRule, 0)
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
if err != nil {
return nil, err
}
for _, notifyRule := range notifyRules {
notifyRuleNames = append(notifyRuleNames, &models.EventNotifyRule{
Id: notifyRule.ID,
Name: notifyRule.Name,
})
}
return notifyRuleNames, nil
}
func GetEventNotifyVersion(ctx *ctx.Context, ruleId int64, notifyRuleIds []int64) (int, error) {
if len(notifyRuleIds) != 0 {
// 如果存在 notify_rule_ids则认为使用新的告警通知方式
return 1, nil
}
rule, err := models.AlertRuleGetById(ctx, ruleId)
if err != nil {
return 0, err
}
return rule.NotifyVersion, nil
}
func (rt *Router) alertCurEventsStatistics(c *gin.Context) {

View File

@@ -56,7 +56,7 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
@@ -96,46 +96,54 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
event.RuleConfigJson = ruleConfig
}
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(event, err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, eventHistoryGroupView bool) ([]int64, error) {
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {
bgid := ginx.QueryInt64(c, "bgid", 0)
var bgids []int64
if !eventHistoryGroupView || strings.HasPrefix(c.Request.URL.Path, "/v1") {
if strings.HasPrefix(c.Request.URL.Path, "/v1") {
// 如果请求路径以 /v1 开头,不查询用户信息
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
if myGroups || (onlySelfGroupView && !user.IsAdmin()) {
// 1. 页面上勾选了我的业务组,需要查询用户所属的业务组
// 2. 如果 onlySelfGroupView 为 true表示只允许查询用户所属的业务组
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 {
if !slices.Contains(bussGroupIds, bgid) && !user.IsAdmin() {
return nil, fmt.Errorf("business group ID not allowed")
}
return []int64{bgid}, nil
}
return bgids, nil
}
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 && !slices.Contains(bussGroupIds, bgid) {
return nil, fmt.Errorf("business group ID not allowed")
return bussGroupIds, nil
}
if bgid > 0 {
// Pass filter parameters, priority to use
return []int64{bgid}, nil
}
return bussGroupIds, nil
return bgids, nil
}

View File

@@ -57,7 +57,7 @@ func (rt *Router) metricFilterDel(c *gin.Context) {
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
ginx.NewRender(c).Message("forbidden")
return
}
}
@@ -79,7 +79,7 @@ func (rt *Router) metricFilterPut(c *gin.Context) {
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
ginx.NewRender(c).Message("forbidden")
return
}
}

View File

@@ -86,15 +86,11 @@ func (rt *Router) builtinMetricsDel(c *gin.Context) {
func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
lst := []string{
"Linux",
"Procstat",
"cAdvisor",
"Ping",
"MySQL",
"Redis",
"Kafka",
"Elasticsearch",
"PostgreSQL",
"MongoDB",
"Memcached",
"ClickHouse",
}
ginx.NewRender(c).Data(lst, nil)
}
@@ -102,29 +98,10 @@ func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lang := c.GetHeader("X-Language")
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.Dangerous(err)
componentList, err := models.BuiltinComponentGets(rt.Ctx, "", disabled)
ginx.Dangerous(err)
// 创建一个 map 来存储 componentList 中的类型
componentTypes := make(map[string]struct{})
for _, comp := range componentList {
componentTypes[comp.Ident] = struct{}{}
}
filteredMetricTypeList := make([]string, 0)
for _, metricType := range metricTypeList {
if _, exists := componentTypes[metricType]; exists {
filteredMetricTypeList = append(filteredMetricTypeList, metricType)
}
}
ginx.NewRender(c).Data(filteredMetricTypeList, nil)
ginx.NewRender(c).Data(metricTypeList, err)
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {

View File

@@ -123,7 +123,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)

View File

@@ -0,0 +1,141 @@
package router
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {
products, err := models.EmbeddedProductGets(rt.Ctx)
ginx.Dangerous(err)
// 获取当前用户可访问的Group ID 列表
me := c.MustGet("user").(*models.User)
if me.IsAdmin() {
ginx.NewRender(c).Data(products, err)
return
}
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
bgSet := make(map[int64]struct{}, len(gids))
for _, id := range gids {
bgSet[id] = struct{}{}
}
// 过滤出公开或有权限访问的私有 product link
var result []*models.EmbeddedProduct
for _, product := range products {
if !product.IsPrivate {
result = append(result, product)
continue
}
for _, tid := range product.TeamIDs {
if _, ok := bgSet[tid]; ok {
result = append(result, product)
break
}
}
}
ginx.NewRender(c).Data(result, err)
}
func (rt *Router) embeddedProductGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
data, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
hashPermission, err := hasEmbeddedProductAccess(rt.Ctx, me, data)
ginx.Dangerous(err)
if !hashPermission {
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Data(data, nil)
}
func (rt *Router) embeddedProductAdd(c *gin.Context) {
var eps []models.EmbeddedProduct
ginx.BindJSON(c, &eps)
me := c.MustGet("user").(*models.User)
for i := range eps {
eps[i].CreateBy = me.Nickname
eps[i].UpdateBy = me.Nickname
}
err := models.AddEmbeddedProduct(rt.Ctx, eps)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductPut(c *gin.Context) {
var ep models.EmbeddedProduct
id := ginx.UrlParamInt64(c, "id")
ginx.BindJSON(c, &ep)
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
oldProduct, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
now := time.Now().Unix()
oldProduct.Name = ep.Name
oldProduct.URL = ep.URL
oldProduct.IsPrivate = ep.IsPrivate
oldProduct.TeamIDs = ep.TeamIDs
oldProduct.UpdateBy = me.Username
oldProduct.UpdateAt = now
err = models.UpdateEmbeddedProduct(rt.Ctx, oldProduct)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductDelete(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
err := models.DeleteEmbeddedProduct(rt.Ctx, id)
ginx.NewRender(c).Message(err)
}
func hasEmbeddedProductAccess(ctx *ctx.Context, user *models.User, ep *models.EmbeddedProduct) (bool, error) {
if user.IsAdmin() || !ep.IsPrivate {
return true, nil
}
gids, err := models.MyGroupIds(ctx, user.Id)
if err != nil {
return false, err
}
groupSet := make(map[int64]struct{}, len(gids))
for _, gid := range gids {
groupSet[gid] = struct{}{}
}
for _, tid := range ep.TeamIDs {
if _, ok := groupSet[tid]; ok {
return true, nil
}
}
return false, nil
}

View File

@@ -0,0 +1,228 @@
package router
import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
// 获取事件Pipeline列表
func (rt *Router) eventPipelinesList(c *gin.Context) {
me := c.MustGet("user").(*models.User)
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.Dangerous(err)
allTids := make([]int64, 0)
for _, pipeline := range pipelines {
allTids = append(allTids, pipeline.TeamIds...)
}
ugMap, err := models.UserGroupIdAndNameMap(rt.Ctx, allTids)
ginx.Dangerous(err)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
}
}
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
ginx.Dangerous(err)
if me.IsAdmin() {
ginx.NewRender(c).Data(pipelines, nil)
return
}
res := make([]*models.EventPipeline, 0)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
if _, ok := gids[tid]; ok {
res = append(res, pipeline)
break
}
}
}
ginx.NewRender(c).Data(res, nil)
}
// 获取单个事件Pipeline详情
func (rt *Router) getEventPipeline(c *gin.Context) {
me := c.MustGet("user").(*models.User)
id := ginx.UrlParamInt64(c, "id")
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = pipeline.FillTeamNames(rt.Ctx)
ginx.Dangerous(err)
ginx.NewRender(c).Data(pipeline, nil)
}
// 创建事件Pipeline
func (rt *Router) addEventPipeline(c *gin.Context) {
var pipeline models.EventPipeline
ginx.BindJSON(c, &pipeline)
user := c.MustGet("user").(*models.User)
now := time.Now().Unix()
pipeline.CreateBy = user.Username
pipeline.CreateAt = now
pipeline.UpdateAt = now
pipeline.UpdateBy = user.Username
err := pipeline.Verify()
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.Dangerous(user.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = models.CreateEventPipeline(rt.Ctx, &pipeline)
ginx.NewRender(c).Message(err)
}
// 更新事件Pipeline
func (rt *Router) updateEventPipeline(c *gin.Context) {
var f models.EventPipeline
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.UpdateBy = me.Username
f.UpdateAt = time.Now().Unix()
pipeline, err := models.GetEventPipeline(rt.Ctx, f.ID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "No such event pipeline")
}
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
ginx.NewRender(c).Message(pipeline.Update(rt.Ctx, &f))
}
// 删除事件Pipeline
func (rt *Router) deleteEventPipelines(c *gin.Context) {
var f struct {
Ids []int64 `json:"ids"`
}
ginx.BindJSON(c, &f)
if len(f.Ids) == 0 {
ginx.Bomb(http.StatusBadRequest, "ids required")
}
me := c.MustGet("user").(*models.User)
for _, id := range f.Ids {
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
}
err := models.DeleteEventPipelines(rt.Ctx, f.Ids)
ginx.NewRender(c).Message(err)
}
// 测试事件Pipeline
func (rt *Router) tryRunEventPipeline(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfig models.EventPipeline `json:"pipeline_config"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
for _, p := range f.PipelineConfig.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor %+v type not found", p)
}
event = processor.Process(rt.Ctx, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is nil")
}
}
ginx.NewRender(c).Data(event, nil)
}
// 测试事件处理器
func (rt *Router) tryRunEventProcessor(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
ProcessorConfig models.ProcessorConfig `json:"processor_config"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor type not found")
}
event = processor.Process(rt.Ctx, event)
logger.Infof("processor %+v result: %+v", f.ProcessorConfig, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is nil")
}
ginx.NewRender(c).Data(event, nil)
}
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfigs []models.PipelineConfig `json:"pipeline_configs"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
pids := make([]int64, 0)
for _, pc := range f.PipelineConfigs {
if pc.Enable {
pids = append(pids, pc.PipelineId)
}
}
pipelines, err := models.GetEventPipelinesByIds(rt.Ctx, pids)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processors not found")
}
for _, pl := range pipelines {
for _, p := range pl.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor %+v type not found", p)
}
event = processor.Process(rt.Ctx, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is nil")
}
}
}
ginx.NewRender(c).Data(event, nil)
}
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.NewRender(c).Data(pipelines, err)
}

View File

@@ -40,6 +40,10 @@ func (rt *Router) statistic(c *gin.Context) {
model = models.NotifyRule{}
case "notify_channel":
model = models.NotifyChannel{}
case "event_pipeline":
statistics, err = models.EventPipelineStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "datasource":
// datasource update_at is different from others
statistics, err = models.DatasourceStatistics(rt.Ctx)

View File

@@ -32,7 +32,7 @@ func (rt *Router) messageTemplatesAdd(c *gin.Context) {
for _, tpl := range lst {
ginx.Dangerous(tpl.Verify())
if !isAdmin && !slice.HaveIntersection(gids, tpl.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
idents = append(idents, tpl.Ident)
@@ -75,8 +75,8 @@ func (rt *Router) messageTemplatesDel(c *gin.Context) {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection[int64](gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -105,8 +105,8 @@ func (rt *Router) messageTemplatePut(c *gin.Context) {
if !me.IsAdmin() {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection[int64](gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
@@ -125,8 +125,8 @@ func (rt *Router) messageTemplateGet(c *gin.Context) {
if mt == nil {
ginx.Bomb(http.StatusNotFound, "message template not found")
}
if mt.Private == 1 && !slice.HaveIntersection[int64](gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if mt.Private == 1 && !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(mt, nil)

View File

@@ -1,11 +1,13 @@
package router
import (
"math"
"net/http"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
@@ -63,10 +65,45 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
username := c.MustGet("username").(string)
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
type MuteTestForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertMute models.AlertMute `json:"mute_config" binding:"required"`
}
func (rt *Router) alertMuteTryRun(c *gin.Context) {
var f MuteTestForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
// 绕过时间范围检查设置时间范围为全量0 到 int64 最大值),仅验证其他匹配条件(如标签、策略类型等)
f.AlertMute.MuteTimeType = models.TimeRange
f.AlertMute.Btime = 0 // 最小可能值(如 Unix 时间戳起点)
f.AlertMute.Etime = math.MaxInt64 // 最大可能值int64 上限)
if !mute.MatchMute(&curEvent, &f.AlertMute) {
ginx.NewRender(c).Data("not match", nil)
return
}
ginx.NewRender(c).Data("mute test match", nil)
}
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:
// business group ID (group_id, group_id), product (prod, rule_prod),
// alert event severity (severities, severity), and event tags (tags, tags).

View File

@@ -9,6 +9,7 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -387,13 +388,17 @@ func (rt *Router) createAuth(ctx context.Context, userIdentity string, td *Token
now := time.Now()
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.AccessUuid), userIdentity, at.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.RefreshUuid), userIdentity, rte.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
cstats.RedisOperationLatency.WithLabelValues("set_token", "success").Observe(time.Since(now).Seconds())
if rt.HTTP.JWTAuth.SingleLogin {
if err := rt.Redis.SAdd(ctx, rt.wrapJwtKey(username), rt.wrapJwtKey(td.AccessUuid), rt.wrapJwtKey(td.RefreshUuid)).Err(); err != nil {
return err
@@ -404,11 +409,25 @@ func (rt *Router) createAuth(ctx context.Context, userIdentity string, td *Token
}
func (rt *Router) fetchAuth(ctx context.Context, givenUuid string) (string, error) {
return rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
now := time.Now()
ret, err := rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("get_token", "fail").Observe(time.Since(now).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("get_token", "success").Observe(time.Since(now).Seconds())
}
return ret, err
}
func (rt *Router) deleteAuth(ctx context.Context, givenUuid string) error {
return rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
err := rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("del_token", "fail").Observe(time.Since(time.Now()).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("del_token", "success").Observe(time.Since(time.Now()).Seconds())
}
return err
}
func (rt *Router) deleteTokens(ctx context.Context, authD *AccessDetails) error {

View File

@@ -31,7 +31,7 @@ func (rt *Router) notifyRulesAdd(c *gin.Context) {
for _, nr := range lst {
ginx.Dangerous(nr.Verify())
if !isAdmin && !slice.HaveIntersection(gids, nr.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
nr.CreateBy = me.Username
@@ -56,8 +56,8 @@ func (rt *Router) notifyRulesDel(c *gin.Context) {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection[int64](gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -79,8 +79,8 @@ func (rt *Router) notifyRulePut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection[int64](gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
f.UpdateBy = me.Username
@@ -99,8 +99,8 @@ func (rt *Router) notifyRuleGet(c *gin.Context) {
ginx.Bomb(http.StatusNotFound, "notify rule not found")
}
if !slice.HaveIntersection[int64](gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(nr, nil)

View File

@@ -45,7 +45,7 @@ func (rt *Router) notifyTplUpdateContent(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
f.UpdateAt = time.Now().Unix()
@@ -64,7 +64,7 @@ func (rt *Router) notifyTplUpdate(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
// get the count of the same channel and name but different id
@@ -188,7 +188,7 @@ func (rt *Router) notifyTplDel(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Message(f.NotifyTplDelete(rt.Ctx, id))

View File

@@ -3,6 +3,7 @@ package router
import (
"fmt"
"sort"
"sync"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
@@ -38,71 +39,116 @@ type LogResp struct {
List []interface{} `json:"list"`
}
func (rt *Router) QueryLogBatch(c *gin.Context) {
var f QueryFrom
ginx.BindJSON(c, &f)
func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFrom) (LogResp, error) {
var resp LogResp
var errMsg string
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Queries {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, q.Did, q.DsCate, q) {
ginx.Bomb(200, "no permission")
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
return LogResp{}, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
ginx.Bomb(200, "cluster not exists")
return LogResp{}, fmt.Errorf("cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q.Query)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
wg.Add(1)
go func(query Query) {
defer wg.Done()
m := make(map[string]interface{})
m["ref"] = q.Ref
m["ds_id"] = q.Did
m["ds_cate"] = q.DsCate
m["data"] = data
resp.List = append(resp.List, m)
resp.Total += total
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
mu.Lock()
defer mu.Unlock()
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
errs = append(errs, err)
return
}
m := make(map[string]interface{})
m["ref"] = query.Ref
m["ds_id"] = query.Did
m["ds_cate"] = query.DsCate
m["data"] = data
resp.List = append(resp.List, m)
resp.Total += total
}(q)
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
wg.Wait()
if len(errs) > 0 {
return LogResp{}, errs[0]
}
if len(resp.List) == 0 {
return LogResp{}, fmt.Errorf("no data")
}
return resp, nil
}
func (rt *Router) QueryLogBatch(c *gin.Context) {
var f QueryFrom
ginx.BindJSON(c, &f)
resp, err := QueryLogBatchConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
if err != nil {
ginx.Bomb(200, "err:%v", err)
}
ginx.NewRender(c).Data(resp, nil)
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.QueryParam) ([]models.DataResp, error) {
var resp []models.DataResp
var err error
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(403, "no permission")
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return nil, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
return nil, fmt.Errorf("cluster not exists")
}
var datas []models.DataResp
datas, err = plug.QueryData(c.Request.Context(), q)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", q, err)
ginx.Bomb(200, "err:%v", err)
}
logger.Debugf("query data: req:%+v resp:%+v", q, datas)
resp = append(resp, datas...)
wg.Add(1)
go func(query interface{}) {
defer wg.Done()
datas, err := plug.QueryData(ctx.Request.Context(), query)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", query, err)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
logger.Debugf("query data: req:%+v resp:%+v", query, datas)
mu.Lock()
resp = append(resp, datas...)
mu.Unlock()
}(q)
}
wg.Wait()
if len(errs) > 0 {
return nil, errs[0]
}
// 面向API的统一处理
// 按照 .Metric 排序
// 确保仪表盘中相同图例的曲线颜色相同
@@ -115,41 +161,80 @@ func (rt *Router) QueryData(c *gin.Context) {
})
}
ginx.NewRender(c).Data(resp, err)
return resp, nil
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
resp, err := QueryDataConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
if err != nil {
ginx.Bomb(200, "err:%v", err)
}
ginx.NewRender(c).Data(resp, nil)
}
// QueryLogConcurrently 并发查询日志
func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.QueryParam) (LogResp, error) {
var resp LogResp
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Querys {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return LogResp{}, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
return LogResp{}, fmt.Errorf("cluster not exists")
}
wg.Add(1)
go func(query interface{}) {
defer wg.Done()
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
logger.Debugf("query log: req:%+v resp:%+v", query, data)
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
mu.Lock()
resp.List = append(resp.List, data...)
resp.Total += total
mu.Unlock()
}(q)
}
wg.Wait()
if len(errs) > 0 {
return LogResp{}, errs[0]
}
if len(resp.List) == 0 {
return LogResp{}, fmt.Errorf("no data")
}
return resp, nil
}
func (rt *Router) QueryLogV2(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp LogResp
var errMsg string
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
ginx.Bomb(200, "cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
resp.List = append(resp.List, data...)
resp.Total += total
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
}
ginx.NewRender(c).Data(resp, nil)
resp, err := QueryLogConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
ginx.NewRender(c).Data(resp, err)
}
func (rt *Router) QueryLog(c *gin.Context) {
@@ -159,7 +244,7 @@ func (rt *Router) QueryLog(c *gin.Context) {
var resp []interface{}
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
ginx.Bomb(200, "forbidden")
}
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)

View File

@@ -0,0 +1,36 @@
package router
import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// sourceTokenAdd 生成新的源令牌
func (rt *Router) sourceTokenAdd(c *gin.Context) {
var f models.SourceToken
ginx.BindJSON(c, &f)
if f.ExpireAt > 0 && f.ExpireAt <= time.Now().Unix() {
ginx.Bomb(http.StatusBadRequest, "expire time must be in the future")
}
token := uuid.New().String()
username := c.MustGet("username").(string)
f.Token = token
f.CreateBy = username
f.CreateAt = time.Now().Unix()
err := f.Add(rt.Ctx)
ginx.Dangerous(err)
go models.CleanupExpiredTokens(rt.Ctx)
ginx.NewRender(c).Data(token, nil)
}

View File

@@ -57,8 +57,11 @@ func (rt *Router) targetGets(c *gin.Context) {
var err error
if len(bgids) > 0 {
for _, gid := range bgids {
rt.bgroCheck(c, gid)
// 如果用户当前查看的是未归组机器,会传入 bgids = [0],此时是不需要校验的,故而排除这种情况
if !(len(bgids) == 1 && bgids[0] == 0) {
for _, gid := range bgids {
rt.bgroCheck(c, gid)
}
}
} else {
user := c.MustGet("user").(*models.User)
@@ -458,7 +461,7 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. You are not admin of BG(%s)", bg.Name)
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
isNeverGrouped, checkErr := haveNeverGroupedIdent(rt.Ctx, f.Idents)
@@ -468,7 +471,7 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -553,7 +556,7 @@ func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
ginx.Dangerous(err)
if len(nopri) > 0 {
ginx.Bomb(http.StatusForbidden, "No permission to operate the targets: %s", strings.Join(nopri, ", "))
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
@@ -576,11 +579,11 @@ func (rt *Router) targetsOfAlertRule(c *gin.Context) {
}
func (rt *Router) checkTargetsExistByIndent(idents []string) {
existingIdents, err := models.TargetNoExistIdents(rt.Ctx, idents)
notExists, err := models.TargetNoExistIdents(rt.Ctx, idents)
ginx.Dangerous(err)
if len(existingIdents) > 0 {
ginx.Bomb(http.StatusBadRequest, "targets not exist: %s", strings.Join(existingIdents, ","))
if len(notExists) > 0 {
ginx.Bomb(http.StatusBadRequest, "targets not exist: %s", strings.Join(notExists, ", "))
}
}

View File

@@ -1,6 +1,7 @@
package router
import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/sender"
@@ -84,20 +85,6 @@ func (rt *Router) taskGetsByGids(c *gin.Context) {
}, nil)
}
type taskForm struct {
Title string `json:"title" binding:"required"`
Account string `json:"account" binding:"required"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script" binding:"required"`
Args string `json:"args"`
Action string `json:"action" binding:"required"`
Creator string `json:"creator"`
Hosts []string `json:"hosts" binding:"required"`
}
func (rt *Router) taskRecordAdd(c *gin.Context) {
var f *models.TaskRecord
ginx.BindJSON(c, &f)
@@ -112,6 +99,14 @@ func (rt *Router) taskAdd(c *gin.Context) {
var f models.TaskForm
ginx.BindJSON(c, &f)
// 把 f.Hosts 中的空字符串过滤掉
hosts := make([]string, 0, len(f.Hosts))
for i := range f.Hosts {
if strings.TrimSpace(f.Hosts[i]) != "" {
hosts = append(hosts, strings.TrimSpace(f.Hosts[i]))
}
}
f.Hosts = hosts
bgid := ginx.UrlParamInt64(c, "id")
user := c.MustGet("user").(*models.User)

View File

@@ -119,6 +119,18 @@ type taskTplForm struct {
Hosts []string `json:"hosts"`
}
func (f *taskTplForm) Verify() {
// 传入的 f.Hosts 可能是 []string{"", "a", "b"},需要过滤掉空字符串
args := make([]string, 0, len(f.Hosts))
for _, ident := range f.Hosts {
if strings.TrimSpace(ident) != "" {
args = append(args, strings.TrimSpace(ident))
}
}
f.Hosts = args
}
func (rt *Router) taskTplAdd(c *gin.Context) {
if !rt.Ibex.Enable {
ginx.Bomb(400, i18n.Sprintf(c.GetHeader("X-Language"), "This functionality has not been enabled. Please contact the system administrator to activate it."))
@@ -127,6 +139,7 @@ func (rt *Router) taskTplAdd(c *gin.Context) {
var f taskTplForm
ginx.BindJSON(c, &f)
f.Verify()
user := c.MustGet("user").(*models.User)
now := time.Now().Unix()
@@ -170,6 +183,7 @@ func (rt *Router) taskTplPut(c *gin.Context) {
var f taskTplForm
ginx.BindJSON(c, &f)
f.Verify()
rt.checkTargetsExistByIndent(f.Hosts)

View File

@@ -40,7 +40,7 @@ func (rt *Router) userVariableConfigPut(context *gin.Context) {
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && f.CreateBy != user.Username {
// only admin or creator can update
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(context).Message(models.ConfigsUserVariableUpdate(rt.Ctx, f))
@@ -54,7 +54,7 @@ func (rt *Router) userVariableConfigDel(context *gin.Context) {
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && configs.CreateBy != user.Username {
// only admin or creator can delete
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
if configs != nil && configs.External == models.ConfigExternal {

View File

@@ -54,7 +54,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
idents := idents.New(ctx, redis)
idents := idents.New(ctx, redis, config.Pushgw)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)

View File

@@ -24,6 +24,7 @@ type Query struct {
Index string `json:"index" mapstructure:"index"`
IndexPatternId int64 `json:"index_pattern" mapstructure:"index_pattern"`
Filter string `json:"filter" mapstructure:"filter"`
Offset int64 `json:"offset" mapstructure:"offset"`
MetricAggr MetricAggr `json:"value" mapstructure:"value"`
GroupBy []GroupBy `json:"group_by" mapstructure:"group_by"`
DateField string `json:"date_field" mapstructure:"date_field"`
@@ -372,6 +373,11 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
start = start - delay
}
if param.Offset > 0 {
end = end - param.Offset
start = start - param.Offset
}
q.Gte(time.Unix(start, 0).UnixMilli())
q.Lte(time.Unix(end, 0).UnixMilli())
q.Format("epoch_millis")

Binary file not shown.

After

Width:  |  Height:  |  Size: 384 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 345 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 336 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 497 KiB

BIN
doc/img/readme/logos.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 956 KiB

View File

@@ -903,4 +903,16 @@ CREATE TABLE dash_annotation (
create_by varchar(64) not null default '',
update_at bigint not null default 0,
update_by varchar(64) not null default ''
);
);
CREATE TABLE source_token (
id bigserial PRIMARY KEY,
source_type varchar(64) NOT NULL DEFAULT '',
source_id varchar(255) NOT NULL DEFAULT '',
token varchar(255) NOT NULL DEFAULT '',
expire_at bigint NOT NULL DEFAULT 0,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT ''
);
CREATE INDEX idx_source_token_type_id_token ON source_token (source_type, source_id, token);

View File

@@ -107,12 +107,6 @@ insert into `role_operation`(role_name, operation) values('Standard', '/help/mig
insert into `role_operation`(role_name, operation) values('Standard', '/alert-rules-built-in');
insert into `role_operation`(role_name, operation) values('Standard', '/dashboards-built-in');
insert into `role_operation`(role_name, operation) values('Standard', '/trace/dependencies');
insert into `role_operation`(role_name, operation) values('Admin', '/help/source');
insert into `role_operation`(role_name, operation) values('Admin', '/help/sso');
insert into `role_operation`(role_name, operation) values('Admin', '/help/notification-tpls');
insert into `role_operation`(role_name, operation) values('Admin', '/help/notification-settings');
insert into `role_operation`(role_name, operation) values('Standard', '/users');
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups');
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups/add');
@@ -471,6 +465,7 @@ CREATE TABLE `alert_cur_event` (
`rule_config` text not null comment 'annotations',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
`original_tags` text comment 'labels key=val,,k2=v2',
`notify_rule_ids` text COMMENT 'notify rule ids',
PRIMARY KEY (`id`),
KEY (`hash`),
KEY (`rule_id`),
@@ -513,6 +508,7 @@ CREATE TABLE `alert_his_event` (
`original_tags` text comment 'labels key=val,,k2=v2',
`annotations` text not null comment 'annotations',
`rule_config` text not null comment 'annotations',
`notify_rule_ids` text COMMENT 'notify rule ids',
PRIMARY KEY (`id`),
INDEX `idx_last_eval_time` (`last_eval_time`),
KEY (`hash`),
@@ -537,7 +533,7 @@ CREATE TABLE `builtin_components` (
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
`disabled` int NOT NULL DEFAULT 0 COMMENT '''is disabled or not''',
PRIMARY KEY (`id`),
UNIQUE KEY `idx_ident` (`ident`)
KEY (`ident`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `builtin_payloads` (
@@ -793,6 +789,7 @@ CREATE TABLE `notify_rule` (
`enable` tinyint(1) not null default 0,
`user_group_ids` varchar(255) not null default '',
`notify_configs` text,
`pipeline_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -833,6 +830,35 @@ CREATE TABLE `message_template` (
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `event_pipeline` (
`id` bigint unsigned not null auto_increment,
`name` varchar(128) not null,
`team_ids` text,
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`label_filters` text,
`attribute_filters` text,
`processors` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `embedded_product` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`url` varchar(255) DEFAULT NULL,
`is_private` boolean DEFAULT NULL,
`team_ids` varchar(255),
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `task_meta`
(
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
@@ -2189,4 +2215,16 @@ CREATE TABLE task_host_99
UNIQUE KEY `idx_id_host` (`id`, `host`),
PRIMARY KEY (`ii`)
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;
DEFAULT CHARSET = utf8mb4;
CREATE TABLE `source_token` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`source_type` varchar(64) NOT NULL DEFAULT '' COMMENT 'source type',
`source_id` varchar(255) NOT NULL DEFAULT '' COMMENT 'source identifier',
`token` varchar(255) NOT NULL DEFAULT '' COMMENT 'access token',
`expire_at` bigint NOT NULL DEFAULT 0 COMMENT 'expire timestamp',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'creator',
PRIMARY KEY (`id`),
KEY `idx_source_type_id_token` (`source_type`, `source_id`, `token`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View File

@@ -226,3 +226,39 @@ ALTER TABLE `notify_channel` ADD COLUMN `weight` int not null default 0;
/* v8.0.0-beta.11 2025-04-10 */
ALTER TABLE `es_index_pattern` ADD COLUMN `note` varchar(1024) not null default '';
ALTER TABLE `datasource` ADD COLUMN `identifier` varchar(255) not null default '';
/* v8.0.0-beta.11 2025-05-15 */
ALTER TABLE `notify_rule` ADD COLUMN `pipeline_configs` text;
CREATE TABLE `event_pipeline` (
`id` bigint unsigned not null auto_increment,
`name` varchar(128) not null,
`team_ids` text,
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`label_filters` text,
`attribute_filters` text,
`processors` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
/* v8.0.0-next */
CREATE TABLE `source_token` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`source_type` varchar(64) NOT NULL DEFAULT '' COMMENT 'source type',
`source_id` varchar(255) NOT NULL DEFAULT '' COMMENT 'source identifier',
`token` varchar(255) NOT NULL DEFAULT '' COMMENT 'access token',
`expire_at` bigint NOT NULL DEFAULT 0 COMMENT 'expire timestamp',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'creator',
PRIMARY KEY (`id`),
KEY `idx_source_type_id_token` (`source_type`, `source_id`, `token`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* v8.0.0-beta.12 2025-06-03 */
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';

View File

@@ -47,6 +47,7 @@ var PromDefaultDatasourceId int64
func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
for {
if !fromAPI {
foundDefaultDatasource := false
items, err := models.GetDatasources(ctx)
if err != nil {
logger.Errorf("get datasource from database fail: %v", err)
@@ -58,6 +59,7 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
for _, item := range items {
if item.PluginType == "prometheus" && item.IsDefault {
atomic.StoreInt64(&PromDefaultDatasourceId, item.Id)
foundDefaultDatasource = true
}
logger.Debugf("get datasource: %+v", item)
@@ -90,6 +92,12 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
}
dss = append(dss, ds)
}
if !foundDefaultDatasource && atomic.LoadInt64(&PromDefaultDatasourceId) != 0 {
logger.Debugf("no default datasource found")
atomic.StoreInt64(&PromDefaultDatasourceId, 0)
}
PutDatasources(dss)
} else {
FromAPIHook()
@@ -183,7 +191,14 @@ func PutDatasources(items []datasource.DatasourceInfo) {
ids = append(ids, item.Id)
// 异步初始化 client 不然数据源同步的会很慢
go DsCache.Put(typ, item.Id, ds)
go func() {
defer func() {
if r := recover(); r != nil {
logger.Errorf("panic in datasource item: %+v panic:%v", item, r)
}
}()
DsCache.Put(typ, item.Id, ds)
}()
}
logger.Debugf("get plugin by type success Ids:%v", ids)

View File

@@ -1,5 +1,5 @@
{
"name": " Kubernetes-Deployment/ Container",
"name": "Kubernetes / Deployment / Container",
"tags": "Categraf",
"configs": {
"panels": [

View File

@@ -1,7 +1,7 @@
{
"id": 0,
"group_id": 0,
"name": "Kubernetes / Container",
"name": "Kubernetes / Pod",
"ident": "",
"tags": "Categraf",
"create_at": 0,
@@ -1748,20 +1748,34 @@
],
"var": [
{
"definition": "prometheus",
"name": "datasource",
"type": "datasource"
"type": "datasource",
"definition": "prometheus",
"defaultValue": 40
},
{
"name": "namespace",
"type": "query",
"hide": false,
"datasource": {
"cate": "prometheus",
"value": "${datasource}"
},
"definition": "label_values(container_cpu_usage_seconds_total, pod)",
"multi": false,
"name": "pod_name",
"definition": "label_values(container_cpu_usage_seconds_total, namespace)",
"reg": "",
"type": "query"
"multi": false
},
{
"name": "pod_name",
"type": "query",
"hide": false,
"datasource": {
"cate": "prometheus",
"value": "${datasource}"
},
"definition": "label_values(container_cpu_usage_seconds_total{namespace=\"$namespace\"}, pod)",
"reg": "",
"multi": false
}
],
"version": "3.0.0"

View File

@@ -1,5 +1,5 @@
{
"name": " Kubernetes-Statefulset / Container ",
"name": "Kubernetes / Statefulset / Container ",
"tags": "Categraf",
"configs": {
"panels": [

View File

@@ -0,0 +1,342 @@
[
{
"uuid": 1745735239727485700,
"collector": "Node",
"typ": "Kubernetes",
"name": "TCP当前连接数",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_netstat_Tcp_CurrEstab * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239701096000,
"collector": "Node",
"typ": "Kubernetes",
"name": "文件描述符使用数",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_filefd_allocated * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239704160000,
"collector": "Node",
"typ": "Kubernetes",
"name": "文件描述符最大限制",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_filefd_maximum * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239750006800,
"collector": "Node",
"typ": "Kubernetes",
"name": "文件系统inode使用率",
"unit": "",
"note": "节点指标\n类型: -",
"lang": "zh_CN",
"expression": "100 - (node_filesystem_files_free * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} / node_filesystem_files * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} * 100)"
},
{
"uuid": 1745735239746991600,
"collector": "Node",
"typ": "Kubernetes",
"name": "文件系统使用率",
"unit": "",
"note": "节点指标\n类型: -",
"lang": "zh_CN",
"expression": "100 - ((node_filesystem_avail_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} * 100) / node_filesystem_size_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239753550000,
"collector": "Node",
"typ": "Kubernetes",
"name": "文件系统错误数",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(node_filesystem_device_error * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}) by (mountpoint)"
},
{
"uuid": 1745735239743097300,
"collector": "Node",
"typ": "Kubernetes",
"name": "磁盘IO使用率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_disk_io_now[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239740169500,
"collector": "Node",
"typ": "Kubernetes",
"name": "磁盘写入IOPS",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_disk_writes_completed_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239734228700,
"collector": "Node",
"typ": "Kubernetes",
"name": "磁盘写入速率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_disk_written_bytes_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239737122600,
"collector": "Node",
"typ": "Kubernetes",
"name": "磁盘读取IOPS",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_disk_reads_completed_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239730406000,
"collector": "Node",
"typ": "Kubernetes",
"name": "磁盘读取速率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_disk_read_bytes_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239694202600,
"collector": "Node",
"typ": "Kubernetes",
"name": "系统上下文切换率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_context_switches_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239697167400,
"collector": "Node",
"typ": "Kubernetes",
"name": "系统中断率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "rate(node_intr_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239724650200,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络发送丢包率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_transmit_drop_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239710266000,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络发送带宽",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_transmit_bytes_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239716205000,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络发送错误率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_transmit_errs_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239721688800,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络接收丢包率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_receive_drop_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239707241500,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络接收带宽",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_receive_bytes_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239713318000,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络接收错误率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(rate(node_network_receive_errs_total[5m]) * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239783181800,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络连接跟踪条目数",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_nf_conntrack_entries * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239786134000,
"collector": "Node",
"typ": "Kubernetes",
"name": "网络连接跟踪限制",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_nf_conntrack_entries_limit * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239675145700,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点 CPU 使用率",
"unit": "",
"note": "节点指标\n类型: by",
"lang": "zh_CN",
"expression": "sum by (instance) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\"}[5m])) * on(instance) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} *100"
},
{
"uuid": 1745735239691192000,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点15分钟负载",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_load15 * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239685264100,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点1分钟负载",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_load1 * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239688232700,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点5分钟负载",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_load5 * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239776256800,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点Swap使用量",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_SwapTotal_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} - node_memory_SwapFree_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239779806500,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点Swap总量",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_SwapTotal_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239681529300,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点上运行的Pod数量",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(kube_pod_info * on(node) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239678397700,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存使用率",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "sum(node_memory_MemTotal_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"} - node_memory_MemAvailable_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}) / sum(node_memory_MemTotal_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"})"
},
{
"uuid": 1745735239760507400,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存详细信息 - 可用",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_MemAvailable_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239756641800,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存详细信息 - 总量",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_MemTotal_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239772786200,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存详细信息 - 空闲",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_MemFree_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239769542000,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存详细信息 - 缓冲区",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_Buffers_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
},
{
"uuid": 1745735239764136000,
"collector": "Node",
"typ": "Kubernetes",
"name": "节点内存详细信息 - 缓存",
"unit": "",
"note": "节点指标\n类型: *",
"lang": "zh_CN",
"expression": "node_memory_Cached_bytes * on(instance, cluster) group_left(nodename) node_uname_info{nodename=~\"$node_name\"}"
}
]

View File

@@ -0,0 +1,282 @@
[
{
"uuid": 1745893024149445000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "Inode数量",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_fs_inodes_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024121015300,
"collector": "Pod",
"typ": "Kubernetes",
"name": "不可中断任务数量",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_tasks_state{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\", state=\"uninterruptible\"}) by (name)"
},
{
"uuid": 1745893024130551800,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器cache使用",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "(sum(container_memory_cache{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name))"
},
{
"uuid": 1745893024108569900,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器CPU Limit",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}/container_spec_cpu_period{namespace=\"$namespace\",",
"lang": "zh_CN",
"expression": "(sum(container_spec_cpu_quota{namespace=\"$namespace\", pod=~\"$pod_name\"}/container_spec_cpu_period{namespace=\"$namespace\", pod=~\"$pod_name\"}) by (name))"
},
{
"uuid": 1745893024112672500,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器CPU load 10",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_cpu_load_average_10s{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024026246700,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器CPU使用率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])*100) by(name)"
},
{
"uuid": 1745893024029544000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器CPU归一化后使用率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])*100) by(name)/((sum(container_spec_cpu_quota{namespace=\"$namespace\", pod=~\"$pod_name\"}/container_spec_cpu_period{namespace=\"$namespace\", pod=~\"$pod_name\"}) by (name)))"
},
{
"uuid": 1745893024146207700,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器I/O",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_fs_io_current{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024136457000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器RSS内存使用",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "(sum(container_memory_rss{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name))"
},
{
"uuid": 1745893024139900200,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器内存 Limit",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_spec_memory_limit_bytes{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024032984300,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器内存使用",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "(sum(container_memory_usage_bytes{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name))"
},
{
"uuid": 1745893024127585500,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器内存使用率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "((sum(container_memory_usage_bytes{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)) /(sum(container_spec_memory_limit_bytes{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)))*100"
},
{
"uuid": 1745893024093620000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器内核态CPU使用率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_cpu_system_seconds_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])*100) by(name)"
},
{
"uuid": 1745893024102879000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器发生CPU throttle的比率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m]))by(name) *100"
},
{
"uuid": 1745893024143177000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器发生OOM次数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_oom_events_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024083942000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器启动时长(小时)",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum((time()-container_start_time_seconds{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"})) by (name)"
},
{
"uuid": 1745893024152466200,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器已使用的文件系统大小",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(container_fs_usage_bytes{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}) by (name)"
},
{
"uuid": 1745893024097849600,
"collector": "Pod",
"typ": "Kubernetes",
"name": "容器用户态CPU使用率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_cpu_user_seconds_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])*100) by(name)"
},
{
"uuid": 1745893024036896800,
"collector": "Pod",
"typ": "Kubernetes",
"name": "文件系统写入速率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_fs_writes_bytes_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])) by(name)"
},
{
"uuid": 1745893024057722000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "文件系统读取速率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\",",
"lang": "zh_CN",
"expression": "sum(rate(container_fs_reads_bytes_total{namespace=\"$namespace\", pod=~\"$pod_name\", image!~\".*pause.*\"}[1m])) by(name)"
},
{
"uuid": 1745893024166898000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络发送丢包数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024160266500,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络发送数据包",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024069935000,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络发送速率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024163721700,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络发送错误数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024173485600,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络接收丢包数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024156389600,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络接收数据包数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024075864800,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络接收速率",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
},
{
"uuid": 1745893024170233300,
"collector": "Pod",
"typ": "Kubernetes",
"name": "网络接收错误数",
"unit": "",
"note": "Pod自身指标\n类型: pod=~\"$pod_name\"}[1m]))",
"lang": "zh_CN",
"expression": "sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=~\"$pod_name\"}[1m])) by(name, interface)"
}
]

View File

@@ -0,0 +1,8 @@
# # collect interval
# interval = 15
# # ntp servers
# ntp_servers = ["ntp.aliyun.com"]
# # response time out seconds
# timeout = 5

View File

@@ -1,6 +1,6 @@
{
"name": "机器常用指标 - 所有机器",
"tags": "categraf",
"name": "机器常用指标(使用 Categraf 作为采集器,如果只想看当前业务组内的机器修改大盘变量 ident 的变量类型为机器标识即可)",
"tags": "Categraf",
"ident": "",
"uuid": 1737103014612000,
"configs": {
@@ -33,9 +33,11 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "count(last_over_time(system_uptime{ident=~\"$ident\"}[1m]))",
"maxDataPoints": 240,
"refId": "A"
"expr": "count(last_over_time(system_uptime{ident=~\"$ident\"}[$__rate_interval]))",
"maxDataPoints": 480,
"refId": "A",
"step": 15,
"instant": false
}
],
"transformations": [
@@ -48,7 +50,7 @@
"maxPerRow": 4,
"custom": {
"textMode": "value",
"graphMode": "none",
"graphMode": "area",
"colorMode": "background",
"calc": "lastNotNull",
"valueField": "Value",
@@ -333,7 +335,7 @@
{
"expr": "100-cpu_usage_idle{ident=~\"$ident\",cpu=\"cpu-total\"}",
"legend": "{{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -348,7 +350,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -428,9 +431,9 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(diskio_io_time{ident=~\"$ident\"}[1m])/10",
"expr": "rate(diskio_io_time{ident=~\"$ident\"}[$__rate_interval])/10",
"legend": "{{ident}} {{name}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -445,7 +448,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -527,7 +531,7 @@
{
"expr": "mem_used_percent{ident=~\"$ident\"}",
"legend": "{{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -542,7 +546,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -622,9 +627,9 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "(1 - mem_swap_free / mem_swap_total)*100",
"expr": "(1 - mem_swap_free{ident=~\"$ident\"} / mem_swap_total{ident=~\"$ident\"})*100 and mem_swap_total{ident=~\"$ident\"} > 0",
"legend": "{{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -640,7 +645,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -720,9 +726,9 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "increase(kernel_vmstat_oom_kill{ident=~\"$ident\"}[5m])",
"expr": "rate(kernel_vmstat_oom_kill{ident=~\"$ident\"}[$__rate_interval])",
"legend": "{{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -733,11 +739,12 @@
"options": {}
}
],
"name": "5分钟内OOM次数",
"name": "每秒OOM次数",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -762,8 +769,8 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
@@ -827,9 +834,9 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(net_bytes_recv{ident=~\"$ident\"}[1m])*8",
"expr": "rate(net_bytes_recv{ident=~\"$ident\"}[$__rate_interval])*8",
"legend": "{{ident}} {{interface}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -844,7 +851,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -914,9 +922,9 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(net_bytes_sent{ident=~\"$ident\"}[1m])*8",
"expr": "rate(net_bytes_sent{ident=~\"$ident\"}[$__rate_interval])*8",
"legend": "{{ident}} {{interface}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "B",
"step": 15
}
@@ -931,7 +939,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -986,21 +995,7 @@
]
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.03,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "",
"type": "timeseries",
"id": "cfb80689-de7b-47fb-9155-052b796dd7f5",
"layout": {
"h": 5,
@@ -1010,43 +1005,13 @@
"i": "cfb80689-de7b-47fb-9155-052b796dd7f5",
"isResizable": true
},
"maxPerRow": 4,
"name": "Time Wait 状态的连接数",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 0
},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "single"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "netstat_tcp_tw{ident=~\"$ident\"}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "B",
"step": 15
}
@@ -1057,8 +1022,61 @@
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
"name": "Time Wait 状态的连接数",
"description": "",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"decimals": 0
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
}
]
},
"thresholdsStyle": {
"mode": "dashed"
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.03,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
@@ -1076,16 +1094,18 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(net_err_in{ident=~\"$ident\"}[1m])",
"expr": "rate(net_err_in{ident=~\"$ident\"}[$__rate_interval])",
"legend": "{{ident}}-{{interface}}-in",
"maxDataPoints": 240,
"refId": "A"
"maxDataPoints": 480,
"refId": "A",
"step": 15
},
{
"expr": "rate(net_err_out{ident=~\"$ident\"}[1m])",
"expr": "rate(net_err_out{ident=~\"$ident\"}[$__rate_interval])",
"legend": "{{ident}}-{{interface}}-out",
"maxDataPoints": 240,
"refId": "B"
"maxDataPoints": 480,
"refId": "B",
"step": 15
}
],
"transformations": [
@@ -1098,7 +1118,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -1164,16 +1185,18 @@
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(net_drop_in{ident=~\"$ident\"}[1m])",
"expr": "rate(net_drop_in{ident=~\"$ident\"}[$__rate_interval])",
"legend": "{{ident}}-{{interface}}-in",
"maxDataPoints": 240,
"refId": "A"
"maxDataPoints": 480,
"refId": "A",
"step": 15
},
{
"expr": "rate(net_drop_out{ident=~\"$ident\"}[1m])",
"expr": "rate(net_drop_out{ident=~\"$ident\"}[$__rate_interval])",
"legend": "{{ident}}-{{interface}}-out",
"maxDataPoints": 240,
"refId": "B"
"maxDataPoints": 480,
"refId": "B",
"step": 15
}
],
"transformations": [
@@ -1186,7 +1209,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -1269,7 +1293,7 @@
{
"expr": "disk_device_error{ident=~\"$ident\"}",
"legend": "{{ident}} {{path}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
@@ -1284,7 +1308,8 @@
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -1293,7 +1318,7 @@
"selectMode": "single"
},
"standardOptions": {
"decimals": 2
"decimals": 0
},
"thresholds": {
"mode": "absolute",
@@ -1352,7 +1377,7 @@
{
"expr": "100 * conntrack_ip_conntrack_count{ident=~\"$ident\"} / conntrack_ip_conntrack_max{ident=~\"$ident\"}",
"legend": "ip_conntrack {{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "A",
"step": 15
},
@@ -1360,7 +1385,7 @@
"__mode__": "__query__",
"expr": "100 * conntrack_nf_conntrack_count{ident=~\"$ident\"} / conntrack_nf_conntrack_max{ident=~\"$ident\"}",
"legend": "nf_conntrack {{ident}}",
"maxDataPoints": 240,
"maxDataPoints": 480,
"refId": "B",
"step": 15
}
@@ -1372,10 +1397,12 @@
}
],
"name": "Conntrack使用率",
"description": "`dmesg -T` 有时看到 conntrack table full 的报错,大概率就是 conntrack 限制太小了,需要调整内核参数",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "single"
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
@@ -1425,6 +1452,346 @@
}
}
]
},
{
"type": "timeseries",
"id": "7c90380f-5ab6-4aa5-9070-f604985a0389",
"layout": {
"h": 5,
"w": 12,
"x": 0,
"y": 32,
"i": "e7117d7c-b946-49fa-bc49-2afb0d2b3a44",
"isResizable": true
},
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "processes_total{ident=~\"$ident\"}",
"legend": "",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Process 总量",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"decimals": 0
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(44, 157, 61, 1)",
"value": null,
"type": "base"
}
]
},
"thresholdsStyle": {
"mode": "dashed"
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.03,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "3334c222-dd92-49eb-9744-4ce0f59031e4",
"layout": {
"h": 5,
"w": 12,
"x": 12,
"y": 32,
"i": "0ecb9f26-4c4d-40d7-9934-5116e3ffa51a",
"isResizable": true
},
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "procstat_rlimit_num_fds_hard{ident=~\"$ident\"}",
"legend": "",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "进程句柄数限制低于4096要注意",
"description": "以现在的硬件配置,通常句柄的 ulimit 应该比较大,如果低于 4096大概率是忘记修改配置了需要注意。这个数据是 Categraf 的 procstat 插件采集的。",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"decimals": 0
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(44, 157, 61, 1)",
"value": null,
"type": "base"
}
]
},
"thresholdsStyle": {
"mode": "dashed"
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.03,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "c3ee640f-e654-4fc7-aa2a-0dd8e9de67cb",
"layout": {
"h": 5,
"w": 12,
"x": 0,
"y": 37,
"i": "423adbbf-8c23-45ab-b7d5-9a81b72291f1",
"isResizable": true
},
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "ntp_offset_ms{ident=~\"$ident\"}",
"legend": "",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "NTP时间偏移",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "milliseconds",
"decimals": 2
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(44, 157, 61, 1)",
"value": null,
"type": "base"
}
]
},
"thresholdsStyle": {
"mode": "dashed"
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.03,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "9bb8d5ef-dc4e-419f-8e95-6dbb97b2afb6",
"layout": {
"h": 5,
"w": 12,
"x": 12,
"y": 37,
"i": "e97f1934-26e8-4bf3-be21-95307443f146",
"isResizable": true
},
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "linux_sysctl_fs_file_nr{ident=~\"$ident\"}/linux_sysctl_fs_file_max{ident=~\"$ident\"} * 100",
"legend": "",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "操作系统文件句柄使用率",
"description": "",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "percent",
"decimals": 0
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(44, 157, 61, 1)",
"value": null,
"type": "base"
}
]
},
"thresholdsStyle": {
"mode": "dashed"
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.03,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
}
],
"var": [
@@ -1450,4 +1817,4 @@
],
"version": "3.0.0"
}
}
}

View File

@@ -1,6 +1,8 @@
{
"name": "机器台账表格视图",
"tags": "",
"name": "机器台账表格视图(使用 Categraf 作为采集器)",
"tags": "Categraf",
"ident": "",
"uuid": 1717556327742611000,
"configs": {
"links": [
{
@@ -16,17 +18,7 @@
],
"panels": [
{
"custom": {
"calc": "lastNotNull",
"colorRange": [
"thresholds"
],
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"type": "hexbin",
"id": "21b8b3ab-26aa-47cb-b814-f310f2d143aa",
"layout": {
"h": 5,
@@ -36,18 +28,43 @@
"x": 0,
"y": 0
},
"maxPerRow": 4,
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "topk(100, cpu_usage_active{cpu=\"cpu-total\", ident=~\"$ident\"})",
"instant": true,
"legend": "{{ident}}",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "CPU利用率",
"maxPerRow": 4,
"custom": {
"textMode": "valueAndName",
"calc": "lastNotNull",
"valueField": "Value",
"colorRange": [
"thresholds"
],
"detailUrl": "/components/dashboard/detail?__uuid__=1737103014612000&ident=${__field.labels.ident}"
},
"options": {
"standardOptions": {
"util": "percent"
},
"thresholds": {
"steps": [
{
"color": "#ef3c3c",
"type": "",
"value": 95
"value": 95,
"type": ""
},
{
"color": "#ff656b",
@@ -65,38 +82,15 @@
"value": null
}
]
},
"standardOptions": {
"util": "percent",
"decimals": 2
}
},
"targets": [
{
"expr": "cpu_usage_active{cpu=\"cpu-total\", ident=~\"$ident\"}",
"instant": true,
"legend": "{{ident}}",
"maxDataPoints": 240,
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "hexbin",
"version": "3.0.0"
}
},
{
"custom": {
"calc": "lastNotNull",
"colorRange": [
"thresholds"
],
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"type": "hexbin",
"id": "86d4a502-21f7-4981-9b38-ed8e696b6f49",
"layout": {
"h": 5,
@@ -106,18 +100,43 @@
"x": 12,
"y": 0
},
"maxPerRow": 4,
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "topk(100, mem_used_percent{ident=~\"$ident\"})",
"instant": true,
"legend": "{{ident}}",
"maxDataPoints": 480,
"refId": "A",
"step": 15
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "内存利用率",
"maxPerRow": 4,
"custom": {
"textMode": "valueAndName",
"calc": "lastNotNull",
"valueField": "Value",
"colorRange": [
"thresholds"
],
"detailUrl": "/components/dashboard/detail?__uuid__=1737103014612000&ident=${__field.labels.ident}"
},
"options": {
"standardOptions": {
"util": "percent"
},
"thresholds": {
"steps": [
{
"color": "#ef3c3c",
"type": "",
"value": 95
"value": 95,
"type": ""
},
{
"color": "#ff656b",
@@ -135,48 +154,15 @@
"value": null
}
]
},
"standardOptions": {
"util": "percent",
"decimals": 2
}
},
"targets": [
{
"expr": "mem_used_percent{ident=~\"$ident\"}",
"instant": true,
"legend": "{{ident}}",
"maxDataPoints": 240,
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "hexbin",
"version": "3.0.0"
}
},
{
"custom": {
"aggrDimension": "ident",
"calc": "lastNotNull",
"colorMode": "background",
"displayMode": "labelValuesToRows",
"linkMode": "appendLinkColumn",
"links": [
{
"targetBlank": true,
"title": "详情",
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}"
}
],
"nowrap": false,
"showHeader": true,
"sortColumn": "ident",
"sortOrder": "ascend",
"tableLayout": "fixed"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"type": "table",
"id": "77bf513a-8504-4d33-9efe-75aaf9abc9e4",
"layout": {
"h": 11,
@@ -186,10 +172,71 @@
"x": 0,
"y": 5
},
"maxPerRow": 4,
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "avg(cpu_usage_active{cpu=\"cpu-total\", ident=~\"$ident\"}) by (ident)",
"legend": "CPU使用率",
"maxDataPoints": 240,
"refId": "A"
},
{
"expr": "avg(mem_used_percent{ident=~\"$ident\"}) by (ident)",
"legend": "内存使用率",
"maxDataPoints": 240,
"refId": "B"
},
{
"expr": "avg(mem_total{ident=~\"$ident\"}) by (ident)",
"legend": "总内存",
"maxDataPoints": 240,
"refId": "C"
},
{
"expr": "avg(disk_used_percent{ident=~\"$ident\",path=\"/\"}) by (ident)",
"legend": "根分区使用率",
"maxDataPoints": 240,
"refId": "D"
}
],
"transformations": [
{
"id": "organize",
"options": {
"renameByName": {
"ident": "机器"
}
}
}
],
"name": "机器列表",
"maxPerRow": 4,
"custom": {
"showHeader": true,
"colorMode": "background",
"nowrap": false,
"tableLayout": "fixed",
"calc": "lastNotNull",
"displayMode": "labelValuesToRows",
"aggrDimension": "ident",
"sortColumn": "ident",
"sortOrder": "ascend",
"pageLimit": 500,
"linkMode": "appendLinkColumn",
"links": [
{
"targetBlank": true,
"title": "详情",
"url": "/components/dashboard/detail?__uuid__=1737103014612000&ident=${__field.labels.ident}"
}
]
},
"options": {
"standardOptions": {}
"standardOptions": {
"decimals": 2
}
},
"overrides": [
{
@@ -199,7 +246,8 @@
},
"properties": {
"standardOptions": {
"util": "percent"
"util": "percent",
"decimals": 2
},
"valueMappings": [
{
@@ -239,7 +287,8 @@
},
"properties": {
"standardOptions": {
"util": "percent"
"util": "percent",
"decimals": 2
},
"valueMappings": [
{
@@ -320,66 +369,32 @@
},
"type": "special"
}
],
"targets": [
{
"expr": "avg(cpu_usage_active{cpu=\"cpu-total\", ident=~\"$ident\"}) by (ident)",
"legend": "CPU使用率",
"maxDataPoints": 240,
"refId": "A"
},
{
"expr": "avg(mem_used_percent{ident=~\"$ident\"}) by (ident)",
"legend": "内存使用率",
"maxDataPoints": 240,
"refId": "B"
},
{
"expr": "avg(mem_total{ident=~\"$ident\"}) by (ident)",
"legend": "总内存",
"maxDataPoints": 240,
"refId": "C"
},
{
"expr": "avg(disk_used_percent{ident=~\"$ident\",path=\"/\"}) by (ident)",
"legend": "根分区使用率",
"maxDataPoints": 240,
"refId": "D"
}
],
"transformations": [
{
"id": "organize",
"options": {
"renameByName": {
"ident": "机器"
}
}
}
],
"type": "table",
"version": "3.0.0"
]
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
"label": "数据源",
"type": "datasource",
"hide": false,
"definition": "prometheus"
},
{
"name": "ident",
"label": "机器",
"type": "query",
"hide": false,
"multi": true,
"allOption": true,
"allValue": ".*",
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(system_load1,ident)",
"multi": true,
"name": "ident",
"type": "query"
"definition": "label_values(system_load1,ident)"
}
],
"version": "3.0.0"
},
"uuid": 1717556327742611000
}
}

View File

@@ -1,18 +1,12 @@
{
"id": 0,
"group_id": 0,
"name": "Processes by UlricQin",
"name": "机器进程数量统计(使用 Categraf 作为采集器)",
"tags": "Categraf",
"ident": "",
"tags": "Categraf Linux OS",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"uuid": 1717556327738575000,
"configs": {
"panels": [
{
"custom": {
"baseColor": "#9470FF",
"calc": "lastNotNull",
"serieWidth": 20,
"sortOrder": "desc"
@@ -41,7 +35,17 @@
},
"type": "range"
}
]
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#9470FF",
"type": "base",
"value": null
}
]
}
},
"targets": [
{
@@ -62,7 +66,6 @@
},
{
"custom": {
"baseColor": "#9470FF",
"calc": "lastNotNull",
"serieWidth": 20,
"sortOrder": "desc"
@@ -91,7 +94,17 @@
},
"type": "range"
}
]
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#9470FF",
"type": "base",
"value": null
}
]
}
},
"targets": [
{
@@ -112,7 +125,6 @@
},
{
"custom": {
"baseColor": "#9470FF",
"calc": "lastNotNull",
"serieWidth": 20,
"sortOrder": "desc"
@@ -150,7 +162,17 @@
},
"type": "range"
}
]
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#9470FF",
"type": "base",
"value": null
}
]
}
},
"targets": [
{
@@ -216,30 +238,26 @@
],
"var": [
{
"definition": "prometheus",
"label": "",
"name": "Datasource",
"type": "datasource"
"label": "数据源",
"type": "datasource",
"hide": false,
"definition": "prometheus"
},
{
"allOption": true,
"name": "ident",
"label": "机器",
"type": "query",
"hide": false,
"datasource": {
"cate": "prometheus",
"value": "${Datasource}"
},
"definition": "label_values(processes_running, ident)",
"label": "Host",
"multi": true,
"name": "ident",
"type": "query"
"allOption": true
}
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327738575000
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,267 +0,0 @@
{
"id": 0,
"group_id": 0,
"name": "Linux Host by Categraf Overview",
"ident": "",
"tags": "",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"configs": {
"links": [
{
"targetBlank": true,
"title": "n9e",
"url": "https://n9e.github.io/"
},
{
"targetBlank": true,
"title": "author",
"url": "http://flashcat.cloud/"
}
],
"panels": [
{
"collapsed": true,
"id": "e5d14dd7-4417-42bd-b7ba-560f34d299a2",
"layout": {
"h": 1,
"i": "e5d14dd7-4417-42bd-b7ba-560f34d299a2",
"isResizable": false,
"w": 24,
"x": 0,
"y": 0
},
"name": "整体概况",
"type": "row"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 1,
"colorMode": "value",
"textMode": "value",
"textSize": {
"value": 50
}
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "41f37540-e695-492a-9d2f-24bfd2d36805",
"layout": {
"h": 3,
"i": "41f37540-e695-492a-9d2f-24bfd2d36805",
"isResizable": true,
"w": 3,
"x": 0,
"y": 1
},
"name": "监控机器数",
"options": {
"standardOptions": {}
},
"targets": [
{
"expr": "count(system_load1)",
"refId": "A"
}
],
"type": "stat",
"version": "2.0.0"
},
{
"custom": {
"baseColor": "#cd75eb",
"calc": "lastNotNull",
"serieWidth": 20,
"sortOrder": "desc"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "585bfc50-7c92-42b1-88ee-5b725b640418",
"layout": {
"h": 3,
"i": "585bfc50-7c92-42b1-88ee-5b725b640418",
"isResizable": true,
"w": 9,
"x": 3,
"y": 1
},
"name": "内存使用率 top10",
"options": {
"standardOptions": {},
"valueMappings": [
{
"match": {
"from": 60
},
"result": {
"color": "#f8070e"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "topk(10, (mem_used_percent))",
"legend": "{{ident}}",
"refId": "A"
}
],
"type": "barGauge",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "60b1e833-3f03-45bb-9385-a3825904a0ac",
"layout": {
"h": 3,
"i": "60b1e833-3f03-45bb-9385-a3825904a0ac",
"isResizable": true,
"w": 12,
"x": 12,
"y": 1
},
"name": "cpu使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "topk(10, (100-cpu_usage_idle{cpu=\"cpu-total\"}))",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"baseColor": "#9470ff",
"calc": "lastNotNull",
"serieWidth": 20,
"sortOrder": "desc"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "69351db9-e646-4e5d-925a-cba29823b00d",
"layout": {
"h": 3,
"i": "69351db9-e646-4e5d-925a-cba29823b00d",
"isResizable": true,
"w": 12,
"x": 0,
"y": 4
},
"name": "磁盘分区使用率 top10",
"options": {
"standardOptions": {},
"valueMappings": [
{
"match": {
"from": 85
},
"result": {
"color": "#f00404"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "topk(10, (disk_used_percent{path!~\"/var.*\"}))",
"legend": "{{ident}} {{path}}",
"refId": "A"
}
],
"type": "barGauge",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "e3675ed9-6d3b-4a41-8d16-d6e82037dce3",
"layout": {
"h": 3,
"i": "e3675ed9-6d3b-4a41-8d16-d6e82037dce3",
"isResizable": true,
"w": 12,
"x": 12,
"y": 4
},
"name": "设备io util top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10, (rate(diskio_io_time[1m])/10))",
"legend": "",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
},
{
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(system_load1,ident)",
"name": "ident",
"type": "query"
}
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327746983000
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,269 +0,0 @@
{
"id": 0,
"group_id": 0,
"name": "HOST by Node Exporter Overview",
"ident": "",
"tags": "Prometheus Host",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"configs": {
"links": [
{
"targetBlank": true,
"title": "n9e",
"url": "https://n9e.gitee.io/"
},
{
"targetBlank": true,
"title": "author",
"url": "http://flashcat.cloud/"
}
],
"panels": [
{
"collapsed": true,
"id": "3173366d-01a2-420e-8878-75124b0051b6",
"layout": {
"h": 1,
"i": "3173366d-01a2-420e-8878-75124b0051b6",
"isResizable": false,
"w": 24,
"x": 0,
"y": 0
},
"name": "整体概况",
"type": "row"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 1,
"colorMode": "value",
"textMode": "value",
"textSize": {
"value": 40
}
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "9a5e3292-b346-4ccf-a793-b83a2f8ac8c5",
"layout": {
"h": 3,
"i": "9a5e3292-b346-4ccf-a793-b83a2f8ac8c5",
"isResizable": true,
"w": 3,
"x": 0,
"y": 1
},
"name": "监控机器数",
"options": {
"standardOptions": {}
},
"targets": [
{
"expr": "count(node_boot_time_seconds)",
"refId": "A"
}
],
"type": "stat",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "",
"id": "e1925fc8-cb05-467b-ba82-bb5cb6be7595",
"layout": {
"h": 3,
"i": "e1925fc8-cb05-467b-ba82-bb5cb6be7595",
"isResizable": true,
"w": 9,
"x": 3,
"y": 1
},
"links": [],
"name": "cpu使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10,100-(avg by (mode, instance)(rate(node_cpu_seconds_total{mode=\"idle\"}[1m])))*100)",
"legend": "{{instance}}",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "327b7e4b-6ec1-47e1-8840-d31cf4b5532b",
"layout": {
"h": 3,
"i": "327b7e4b-6ec1-47e1-8840-d31cf4b5532b",
"isResizable": true,
"w": 12,
"x": 12,
"y": 1
},
"name": "内存使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10,(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100)",
"legend": "{{instance}}",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "5a9d4a65-3f73-42cc-859e-fc0b82791b59",
"layout": {
"h": 3,
"i": "5a9d4a65-3f73-42cc-859e-fc0b82791b59",
"isResizable": true,
"w": 12,
"x": 0,
"y": 4
},
"name": "磁盘分区使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10,(node_filesystem_avail_bytes{device!~'rootfs', device!~\"tmpfs\",mountpoint!~\"/var/lib.*\"} * 100) / node_filesystem_size_bytes{device!~'rootfs', device!~\"tmpfs\",mountpoint!~\"/var/lib.*\"})",
"legend": "{{instance}}-{{mountpoint}}",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "fa764e4b-5ca9-45d8-b12e-604f8743f9d9",
"layout": {
"h": 3,
"i": "fa764e4b-5ca9-45d8-b12e-604f8743f9d9",
"isResizable": true,
"w": 12,
"x": 12,
"y": 4
},
"name": "设备io util top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10,rate(node_disk_io_time_seconds_total[5m]) * 100)",
"legend": "{{instance}}-{{device}}",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
},
{
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(node_uname_info, instance)",
"name": "node",
"selected": "$node",
"type": "query"
}
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327752931000
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,264 +0,0 @@
{
"id": 0,
"group_id": 0,
"name": "HOST by Telegraf Overview",
"ident": "",
"tags": "",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"configs": {
"links": [
{
"targetBlank": true,
"title": "n9e",
"url": "https://n9e.gitee.io/"
},
{
"targetBlank": true,
"title": "author",
"url": "http://flashcat.cloud/"
}
],
"panels": [
{
"collapsed": true,
"id": "0f6a1394-7cf9-4958-bcfe-2fbb59e77c12",
"layout": {
"h": 1,
"i": "0f6a1394-7cf9-4958-bcfe-2fbb59e77c12",
"isResizable": false,
"w": 24,
"x": 0,
"y": 0
},
"name": "整体概况",
"type": "row"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 1,
"colorMode": "value",
"textMode": "value",
"textSize": {
"value": 50
}
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "877b6db5-e82c-499a-9ebc-8ad72c2891a8",
"layout": {
"h": 3,
"i": "877b6db5-e82c-499a-9ebc-8ad72c2891a8",
"isResizable": true,
"w": 3,
"x": 0,
"y": 1
},
"name": "监控机器数",
"options": {
"standardOptions": {}
},
"targets": [
{
"expr": "count(system_load1)",
"refId": "A"
}
],
"type": "stat",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "29a3e6ae-d278-49b3-972b-f12a6c7c091c",
"layout": {
"h": 3,
"i": "29a3e6ae-d278-49b3-972b-f12a6c7c091c",
"isResizable": true,
"w": 9,
"x": 3,
"y": 1
},
"name": "内存率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10, mem_used_percent)",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "9f2a24d5-d19f-4651-b76d-add6b9011821",
"layout": {
"h": 3,
"i": "9f2a24d5-d19f-4651-b76d-add6b9011821",
"isResizable": true,
"w": 12,
"x": 12,
"y": 1
},
"name": "cpu使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "topk(10, (100-cpu_usage_idle{cpu=\"cpu-total\"}))",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "dcd60296-db84-4562-99f3-2829c2f064a4",
"layout": {
"h": 3,
"i": "dcd60296-db84-4562-99f3-2829c2f064a4",
"isResizable": true,
"w": 12,
"x": 0,
"y": 4
},
"name": "磁盘分区使用率 top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "topk(10, (disk_used_percent{path!~\"/var.*\"}))",
"legend": "{{ident}}-{{path}}",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "ef7df29d-7dce-4788-ae42-d21d842c67d6",
"layout": {
"h": 3,
"i": "ef7df29d-7dce-4788-ae42-d21d842c67d6",
"isResizable": true,
"w": 12,
"x": 12,
"y": 4
},
"name": "设备io util top10",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
{
"expr": "topk(10, (rate(diskio_io_time[1m])/10))",
"legend": "",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
},
{
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(system_load1,ident)",
"name": "ident",
"type": "query"
}
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327757522000
}

View File

@@ -53,4 +53,9 @@ nr_alloc_batch = 0
## arp_package
统计 ARP 包的数量,该插件依赖 cgo如果需要该插件需要下载 `with-cgo` 的 categraf 发布包。
统计 ARP 包的数量,该插件依赖 cgo如果需要该插件需要下载 `with-cgo` 的 categraf 发布包。
## ntp
监控机器时间偏移量,只需要给出 ntp 服务端地址Categraf 就会周期性去请求,对比本机时间,得到偏移量,监控指标是 ntp_offset_ms 顾名思义,单位是毫秒,一般这个值不能超过 1000

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,163 @@
package memsto
import (
"fmt"
"sync"
"time"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
type EventProcessorCacheType struct {
statTotal int64
statLastUpdated int64
ctx *ctx.Context
stats *Stats
sync.RWMutex
eventPipelines map[int64]*models.EventPipeline // key: pipeline id
}
func NewEventProcessorCache(ctx *ctx.Context, stats *Stats) *EventProcessorCacheType {
epc := &EventProcessorCacheType{
statTotal: -1,
statLastUpdated: -1,
ctx: ctx,
stats: stats,
eventPipelines: make(map[int64]*models.EventPipeline),
}
epc.SyncEventProcessors()
return epc
}
func (epc *EventProcessorCacheType) Reset() {
epc.Lock()
defer epc.Unlock()
epc.statTotal = -1
epc.statLastUpdated = -1
epc.eventPipelines = make(map[int64]*models.EventPipeline)
}
func (epc *EventProcessorCacheType) StatChanged(total, lastUpdated int64) bool {
if epc.statTotal == total && epc.statLastUpdated == lastUpdated {
return false
}
return true
}
func (epc *EventProcessorCacheType) Set(m map[int64]*models.EventPipeline, total, lastUpdated int64) {
epc.Lock()
epc.eventPipelines = m
epc.Unlock()
// only one goroutine used, so no need lock
epc.statTotal = total
epc.statLastUpdated = lastUpdated
}
func (epc *EventProcessorCacheType) Get(processorId int64) *models.EventPipeline {
epc.RLock()
defer epc.RUnlock()
return epc.eventPipelines[processorId]
}
func (epc *EventProcessorCacheType) GetProcessorsById(processorId int64) []models.Processor {
epc.RLock()
defer epc.RUnlock()
eventPipeline, ok := epc.eventPipelines[processorId]
if !ok {
return []models.Processor{}
}
return eventPipeline.Processors
}
func (epc *EventProcessorCacheType) GetProcessorIds() []int64 {
epc.RLock()
defer epc.RUnlock()
count := len(epc.eventPipelines)
list := make([]int64, 0, count)
for eid := range epc.eventPipelines {
list = append(list, eid)
}
return list
}
func (epc *EventProcessorCacheType) SyncEventProcessors() {
err := epc.syncEventProcessors()
if err != nil {
fmt.Println("failed to sync event processors:", err)
exit(1)
}
go epc.loopSyncEventProcessors()
}
func (epc *EventProcessorCacheType) loopSyncEventProcessors() {
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := epc.syncEventProcessors(); err != nil {
logger.Warning("failed to sync event processors:", err)
}
}
}
func (epc *EventProcessorCacheType) syncEventProcessors() error {
start := time.Now()
stat, err := models.EventPipelineStatistics(epc.ctx)
if err != nil {
dumper.PutSyncRecord("event_processors", start.Unix(), -1, -1, "failed to query statistics: "+err.Error())
return errors.WithMessage(err, "failed to exec StatisticsGet for EventPipeline")
}
if !epc.StatChanged(stat.Total, stat.LastUpdated) {
epc.stats.GaugeCronDuration.WithLabelValues("sync_event_processors").Set(0)
epc.stats.GaugeSyncNumber.WithLabelValues("sync_event_processors").Set(0)
dumper.PutSyncRecord("event_processors", start.Unix(), -1, -1, "not changed")
return nil
}
lst, err := models.ListEventPipelines(epc.ctx)
if err != nil {
dumper.PutSyncRecord("event_processors", start.Unix(), -1, -1, "failed to query records: "+err.Error())
return errors.WithMessage(err, "failed to exec ListEventPipelines")
}
m := make(map[int64]*models.EventPipeline)
for i := 0; i < len(lst); i++ {
eventPipeline := lst[i]
for _, p := range eventPipeline.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
logger.Warningf("event_pipeline_id: %d, event:%+v, processor:%+v type not found", eventPipeline.ID, eventPipeline, p)
continue
}
eventPipeline.Processors = append(eventPipeline.Processors, processor)
}
m[lst[i].ID] = eventPipeline
}
epc.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
epc.stats.GaugeCronDuration.WithLabelValues("sync_event_processors").Set(float64(ms))
epc.stats.GaugeSyncNumber.WithLabelValues("sync_event_processors").Set(float64(len(m)))
logger.Infof("timer: sync event processors done, cost: %dms, number: %d", ms, len(m))
dumper.PutSyncRecord("event_processors", start.Unix(), ms, len(m), "success")
return nil
}

View File

@@ -2,8 +2,10 @@ package memsto
import (
"crypto/tls"
"encoding/json"
"fmt"
"net/http"
"strings"
"sync"
"time"
@@ -14,9 +16,23 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
// NotifyTask 表示一个通知发送任务
type NotifyTask struct {
Events []*models.AlertCurEvent
NotifyRuleId int64
NotifyChannel *models.NotifyChannelConfig
TplContent map[string]interface{}
CustomParams map[string]string
Sendtos []string
}
// NotifyRecordFunc 通知记录函数类型
type NotifyRecordFunc func(ctx *ctx.Context, events []*models.AlertCurEvent, notifyRuleId int64, channelName, target, resp string, err error)
type NotifyChannelCacheType struct {
statTotal int64
statLastUpdated int64
@@ -24,13 +40,18 @@ type NotifyChannelCacheType struct {
stats *Stats
sync.RWMutex
channels map[int64]*models.NotifyChannelConfig // key: channel id
httpConcurrency map[int64]chan struct{}
channels map[int64]*models.NotifyChannelConfig // key: channel id
channelsQueue map[int64]*list.SafeListLimited
httpClient map[int64]*http.Client
smtpCh map[int64]chan *models.EmailContext
smtpQuitCh map[int64]chan struct{}
// 队列消费者控制
queueQuitCh map[int64]chan struct{}
// 通知记录回调函数
notifyRecordFunc NotifyRecordFunc
}
func NewNotifyChannelCache(ctx *ctx.Context, stats *Stats) *NotifyChannelCacheType {
@@ -40,18 +61,20 @@ func NewNotifyChannelCache(ctx *ctx.Context, stats *Stats) *NotifyChannelCacheTy
ctx: ctx,
stats: stats,
channels: make(map[int64]*models.NotifyChannelConfig),
channelsQueue: make(map[int64]*list.SafeListLimited),
queueQuitCh: make(map[int64]chan struct{}),
httpClient: make(map[int64]*http.Client),
smtpCh: make(map[int64]chan *models.EmailContext),
smtpQuitCh: make(map[int64]chan struct{}),
}
ncc.SyncNotifyChannels()
return ncc
}
func (ncc *NotifyChannelCacheType) Reset() {
ncc.Lock()
defer ncc.Unlock()
ncc.statTotal = -1
ncc.statLastUpdated = -1
ncc.channels = make(map[int64]*models.NotifyChannelConfig)
// SetNotifyRecordFunc 设置通知记录回调函数
func (ncc *NotifyChannelCacheType) SetNotifyRecordFunc(fn NotifyRecordFunc) {
ncc.notifyRecordFunc = fn
}
func (ncc *NotifyChannelCacheType) StatChanged(total, lastUpdated int64) bool {
@@ -62,30 +85,253 @@ func (ncc *NotifyChannelCacheType) StatChanged(total, lastUpdated int64) bool {
return true
}
func (ncc *NotifyChannelCacheType) Set(m map[int64]*models.NotifyChannelConfig, httpConcurrency map[int64]chan struct{}, httpClient map[int64]*http.Client,
smtpCh map[int64]chan *models.EmailContext, quitCh map[int64]chan struct{}, total, lastUpdated int64) {
func (ncc *NotifyChannelCacheType) Set(m map[int64]*models.NotifyChannelConfig, total, lastUpdated int64) {
ncc.Lock()
for _, k := range ncc.httpConcurrency {
close(k)
}
ncc.httpConcurrency = httpConcurrency
ncc.channels = m
ncc.httpClient = httpClient
ncc.smtpCh = smtpCh
defer ncc.Unlock()
for i := range ncc.smtpQuitCh {
close(ncc.smtpQuitCh[i])
}
// 1. 处理需要删除的通道
ncc.removeDeletedChannels(m)
ncc.smtpQuitCh = quitCh
ncc.Unlock()
// 2. 处理新增和更新的通道
ncc.addOrUpdateChannels(m)
// only one goroutine used, so no need lock
ncc.statTotal = total
ncc.statLastUpdated = lastUpdated
}
// removeDeletedChannels 移除已删除的通道
func (ncc *NotifyChannelCacheType) removeDeletedChannels(newChannels map[int64]*models.NotifyChannelConfig) {
for chID := range ncc.channels {
if _, exists := newChannels[chID]; !exists {
logger.Infof("removing deleted channel %d", chID)
// 停止消费者协程
if quitCh, exists := ncc.queueQuitCh[chID]; exists {
close(quitCh)
delete(ncc.queueQuitCh, chID)
}
// 删除队列
delete(ncc.channelsQueue, chID)
// 删除HTTP客户端
delete(ncc.httpClient, chID)
// 停止SMTP发送器
if quitCh, exists := ncc.smtpQuitCh[chID]; exists {
close(quitCh)
delete(ncc.smtpQuitCh, chID)
delete(ncc.smtpCh, chID)
}
// 删除通道配置
delete(ncc.channels, chID)
}
}
}
// addOrUpdateChannels 添加或更新通道
func (ncc *NotifyChannelCacheType) addOrUpdateChannels(newChannels map[int64]*models.NotifyChannelConfig) {
for chID, newChannel := range newChannels {
oldChannel, exists := ncc.channels[chID]
if exists {
if ncc.channelConfigChanged(oldChannel, newChannel) {
logger.Infof("updating channel %d (new: %t)", chID, !exists)
ncc.stopChannelResources(chID)
} else {
logger.Infof("channel %d config not changed", chID)
continue
}
}
// 更新通道配置
ncc.channels[chID] = newChannel
// 根据类型创建相应的资源
switch newChannel.RequestType {
case "http", "flashduty":
// 创建HTTP客户端
if newChannel.RequestConfig != nil && newChannel.RequestConfig.HTTPRequestConfig != nil {
cli, err := models.GetHTTPClient(newChannel)
if err != nil {
logger.Warningf("failed to create HTTP client for channel %d: %v", chID, err)
} else {
if ncc.httpClient == nil {
ncc.httpClient = make(map[int64]*http.Client)
}
ncc.httpClient[chID] = cli
}
}
// 对于 http 类型,启动队列和消费者
if newChannel.RequestType == "http" {
ncc.startHttpChannel(chID, newChannel)
}
case "smtp":
// 创建SMTP发送器
if newChannel.RequestConfig != nil && newChannel.RequestConfig.SMTPRequestConfig != nil {
ch := make(chan *models.EmailContext)
quit := make(chan struct{})
go ncc.startEmailSender(chID, newChannel.RequestConfig.SMTPRequestConfig, ch, quit)
if ncc.smtpCh == nil {
ncc.smtpCh = make(map[int64]chan *models.EmailContext)
}
if ncc.smtpQuitCh == nil {
ncc.smtpQuitCh = make(map[int64]chan struct{})
}
ncc.smtpCh[chID] = ch
ncc.smtpQuitCh[chID] = quit
}
}
}
}
// channelConfigChanged 检查通道配置是否发生变化
func (ncc *NotifyChannelCacheType) channelConfigChanged(oldChannel, newChannel *models.NotifyChannelConfig) bool {
if oldChannel == nil || newChannel == nil {
return true
}
// check updateat
if oldChannel.UpdateAt != newChannel.UpdateAt {
return true
}
return false
}
// stopChannelResources 停止通道的相关资源
func (ncc *NotifyChannelCacheType) stopChannelResources(chID int64) {
// 停止HTTP消费者协程
if quitCh, exists := ncc.queueQuitCh[chID]; exists {
close(quitCh)
delete(ncc.queueQuitCh, chID)
delete(ncc.channelsQueue, chID)
}
// 停止SMTP发送器
if quitCh, exists := ncc.smtpQuitCh[chID]; exists {
close(quitCh)
delete(ncc.smtpQuitCh, chID)
delete(ncc.smtpCh, chID)
}
}
// startHttpChannel 启动HTTP通道的队列和消费者
func (ncc *NotifyChannelCacheType) startHttpChannel(chID int64, channel *models.NotifyChannelConfig) {
if channel.RequestConfig == nil || channel.RequestConfig.HTTPRequestConfig == nil {
logger.Warningf("notify channel %+v http request config not found", channel)
return
}
// 创建队列
queue := list.NewSafeListLimited(100000)
ncc.channelsQueue[chID] = queue
// 启动消费者协程
quitCh := make(chan struct{})
ncc.queueQuitCh[chID] = quitCh
// 启动指定数量的消费者协程
concurrency := channel.RequestConfig.HTTPRequestConfig.Concurrency
for i := 0; i < concurrency; i++ {
go ncc.startNotifyConsumer(chID, queue, quitCh)
}
logger.Infof("started %d notify consumers for channel %d", concurrency, chID)
}
// 启动通知消费者协程
func (ncc *NotifyChannelCacheType) startNotifyConsumer(channelID int64, queue *list.SafeListLimited, quitCh chan struct{}) {
logger.Infof("starting notify consumer for channel %d", channelID)
for {
select {
case <-quitCh:
logger.Infof("notify consumer for channel %d stopped", channelID)
return
default:
// 从队列中取出任务
task := queue.PopBack()
if task == nil {
// 队列为空,等待一段时间
time.Sleep(100 * time.Millisecond)
continue
}
notifyTask, ok := task.(*NotifyTask)
if !ok {
logger.Errorf("invalid task type in queue for channel %d", channelID)
continue
}
// 处理通知任务
ncc.processNotifyTask(notifyTask)
}
}
}
// processNotifyTask 处理通知任务(仅处理 http 类型)
func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
httpClient := ncc.GetHttpClient(task.NotifyChannel.ID)
// 现在只处理 http 类型flashduty 保持直接发送
if task.NotifyChannel.RequestType == "http" {
if len(task.Sendtos) == 0 || ncc.needBatchContacts(task.NotifyChannel.RequestConfig.HTTPRequestConfig) {
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, task.Sendtos, httpClient)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos, resp, err)
// 调用通知记录回调函数
if ncc.notifyRecordFunc != nil {
ncc.notifyRecordFunc(ncc.ctx, task.Events, task.NotifyRuleId, task.NotifyChannel.Name, ncc.getSendTarget(task.CustomParams, task.Sendtos), resp, err)
}
} else {
for i := range task.Sendtos {
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, []string{task.Sendtos[i]}, httpClient)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
// 调用通知记录回调函数
if ncc.notifyRecordFunc != nil {
ncc.notifyRecordFunc(ncc.ctx, task.Events, task.NotifyRuleId, task.NotifyChannel.Name, ncc.getSendTarget(task.CustomParams, []string{task.Sendtos[i]}), resp, err)
}
}
}
}
}
// 判断是否需要批量发送联系人
func (ncc *NotifyChannelCacheType) needBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
if requestConfig == nil {
return false
}
b, _ := json.Marshal(requestConfig)
return strings.Contains(string(b), "$sendtos")
}
// 获取发送目标
func (ncc *NotifyChannelCacheType) getSendTarget(customParams map[string]string, sendtos []string) string {
if len(customParams) == 0 {
return strings.Join(sendtos, ",")
}
values := make([]string, 0)
for _, value := range customParams {
runes := []rune(value)
if len(runes) <= 4 {
values = append(values, value)
} else {
maskedValue := string(runes[:len(runes)-4]) + "****"
values = append(values, maskedValue)
}
}
return strings.Join(values, ",")
}
func (ncc *NotifyChannelCacheType) Get(channelId int64) *models.NotifyChannelConfig {
ncc.RLock()
defer ncc.RUnlock()
@@ -117,6 +363,25 @@ func (ncc *NotifyChannelCacheType) GetChannelIds() []int64 {
return list
}
// 新增:将通知任务加入队列
func (ncc *NotifyChannelCacheType) EnqueueNotifyTask(task *NotifyTask) bool {
ncc.RLock()
queue := ncc.channelsQueue[task.NotifyChannel.ID]
ncc.RUnlock()
if queue == nil {
logger.Errorf("no queue found for channel %d", task.NotifyChannel.ID)
return false
}
success := queue.PushFront(task)
if !success {
logger.Warningf("failed to enqueue notify task for channel %d, queue is full", task.NotifyChannel.ID)
}
return success
}
func (ncc *NotifyChannelCacheType) SyncNotifyChannels() {
err := ncc.syncNotifyChannels()
if err != nil {
@@ -162,38 +427,8 @@ func (ncc *NotifyChannelCacheType) syncNotifyChannels() error {
m[lst[i].ID] = lst[i]
}
httpConcurrency := make(map[int64]chan struct{})
httpClient := make(map[int64]*http.Client)
smtpCh := make(map[int64]chan *models.EmailContext)
quitCh := make(map[int64]chan struct{})
for i := range lst {
// todo 优化变更粒度
switch lst[i].RequestType {
case "http", "flashduty":
if lst[i].RequestConfig == nil || lst[i].RequestConfig.HTTPRequestConfig == nil {
logger.Warningf("notify channel %+v http request config not found", lst[i])
continue
}
cli, _ := models.GetHTTPClient(lst[i])
httpClient[lst[i].ID] = cli
httpConcurrency[lst[i].ID] = make(chan struct{}, lst[i].RequestConfig.HTTPRequestConfig.Concurrency)
for j := 0; j < lst[i].RequestConfig.HTTPRequestConfig.Concurrency; j++ {
httpConcurrency[lst[i].ID] <- struct{}{}
}
case "smtp":
ch := make(chan *models.EmailContext)
quit := make(chan struct{})
go ncc.startEmailSender(lst[i].ID, lst[i].RequestConfig.SMTPRequestConfig, ch, quit)
smtpCh[lst[i].ID] = ch
quitCh[lst[i].ID] = quit
default:
}
}
ncc.Set(m, httpConcurrency, httpClient, smtpCh, quitCh, stat.Total, stat.LastUpdated)
// 增量更新:只传递通道配置,让增量更新逻辑按需创建资源
ncc.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
ncc.stats.GaugeCronDuration.WithLabelValues("sync_notify_channels").Set(float64(ms))
@@ -305,22 +540,3 @@ func (ncc *NotifyChannelCacheType) dialSmtp(quitCh chan struct{}, d *gomail.Dial
}
}
}
func (ncc *NotifyChannelCacheType) HttpConcurrencyAdd(channelId int64) bool {
ncc.RLock()
defer ncc.RUnlock()
if _, ok := ncc.httpConcurrency[channelId]; !ok {
return false
}
_, ok := <-ncc.httpConcurrency[channelId]
return ok
}
func (ncc *NotifyChannelCacheType) HttpConcurrencyDone(channelId int64) {
ncc.RLock()
defer ncc.RUnlock()
if _, ok := ncc.httpConcurrency[channelId]; !ok {
return
}
ncc.httpConcurrency[channelId] <- struct{}{}
}

View File

@@ -37,33 +37,35 @@ func (v *AlertAggrView) Verify() error {
return errors.New("rule is blank")
}
var validFields = []string{
"cluster",
"group_id",
"group_name",
"rule_id",
"rule_name",
"severity",
"runbook_url",
"target_ident",
"target_note",
}
arr := strings.Split(v.Rule, "::")
for i := 0; i < len(arr); i++ {
pair := strings.Split(arr[i], ":")
if len(pair) != 2 {
return errors.New("rule invalid")
if !strings.Contains(v.Rule, "{{") {
var validFields = []string{
"cluster",
"group_id",
"group_name",
"rule_id",
"rule_name",
"severity",
"runbook_url",
"target_ident",
"target_note",
}
if !(pair[0] == "field" || pair[0] == "tagkey") {
return errors.New("rule invalid")
}
arr := strings.Split(v.Rule, "::")
for i := 0; i < len(arr); i++ {
pair := strings.Split(arr[i], ":")
if len(pair) != 2 {
return errors.New("rule invalid")
}
if pair[0] == "field" {
// 只支持有限的field
if !slice.ContainsString(validFields, pair[1]) {
return fmt.Errorf("unsupported field: %s", pair[1])
if !(pair[0] == "field" || pair[0] == "tagkey") {
return errors.New("rule invalid")
}
if pair[0] == "field" {
// 只支持有限的field
if !slice.ContainsString(validFields, pair[1]) {
return fmt.Errorf("unsupported field: %s", pair[1])
}
}
}
}
@@ -137,3 +139,14 @@ func AlertAggrViewGet(ctx *ctx.Context, where string, args ...interface{}) (*Ale
return lst[0], nil
}
func GetAlertAggrViewByViewID(ctx *ctx.Context, viewID int64) (*AlertAggrView, error) {
view, err := AlertAggrViewGet(ctx, "id = ?", viewID)
if err != nil {
return nil, err
}
if view == nil {
return nil, errors.New("alert aggr view not found")
}
return view, nil
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"reflect"
"strconv"
"strings"
@@ -15,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -41,30 +43,30 @@ type AlertCurEvent struct {
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"`
NotifyRecovered int `json:"notify_recovered"`
NotifyChannels string `json:"-"` // for db
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // for db
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyGroupsObj []*UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyChannels string `json:"-"` // for db
NotifyChannelsJSON []string `json:"notify_channels,omitempty" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // for db
NotifyGroupsJSON []string `json:"notify_groups,omitempty" gorm:"-"` // for fe
NotifyGroupsObj []*UserGroup `json:"notify_groups_obj,omitempty" gorm:"-"` // for fe
TargetIdent string `json:"target_ident"`
TargetNote string `json:"target_note"`
TriggerTime int64 `json:"trigger_time"`
TriggerValue string `json:"trigger_value"`
TriggerValues string `json:"trigger_values" gorm:"-"`
TriggerValuesJson EventTriggerValues `json:"trigger_values_json" gorm:"-"`
Tags string `json:"-"` // for db
TagsJSON []string `json:"tags" gorm:"-"` // for fe
TagsMap map[string]string `json:"tags_map" gorm:"-"` // for internal usage
OriginalTags string `json:"-"` // for db
OriginalTagsJSON []string `json:"original_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
IsRecovered bool `json:"is_recovered" gorm:"-"` // for notify.py
NotifyUsersObj []*User `json:"notify_users_obj" gorm:"-"` // for notify.py
LastEvalTime int64 `json:"last_eval_time" gorm:"-"` // for notify.py 上次计算的时间
LastSentTime int64 `json:"last_sent_time" gorm:"-"` // 上次发送时间
NotifyCurNumber int `json:"notify_cur_number"` // notify: current number
FirstTriggerTime int64 `json:"first_trigger_time"` // 连续告警的首次告警时间
Tags string `json:"-"` // for db
TagsJSON []string `json:"tags" gorm:"-"` // for fe
TagsMap map[string]string `json:"tags_map" gorm:"-"` // for internal usage
OriginalTags string `json:"-"` // for db
OriginalTagsJSON []string `json:"original_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
IsRecovered bool `json:"is_recovered" gorm:"-"` // for notify.py
NotifyUsersObj []*User `json:"notify_users_obj,omitempty" gorm:"-"` // for notify.py
LastEvalTime int64 `json:"last_eval_time" gorm:"-"` // for notify.py 上次计算的时间
LastSentTime int64 `json:"last_sent_time" gorm:"-"` // 上次发送时间
NotifyCurNumber int `json:"notify_cur_number"` // notify: current number
FirstTriggerTime int64 `json:"first_trigger_time"` // 连续告警的首次告警时间
ExtraConfig interface{} `json:"extra_config" gorm:"-"`
Status int `json:"status" gorm:"-"`
Claimant string `json:"claimant" gorm:"-"`
@@ -74,7 +76,15 @@ type AlertCurEvent struct {
RecoverConfig RecoverConfig `json:"recover_config" gorm:"-"`
RuleHash string `json:"rule_hash" gorm:"-"`
ExtraInfoMap []map[string]string `json:"extra_info_map" gorm:"-"`
NotifyRuleIDs []int64 `json:"notify_rule_ids" gorm:"-"`
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
NotifyVersion int `json:"notify_version" gorm:"-"` // 0: old, 1: new
NotifyRules []*EventNotifyRule `json:"notify_rules" gorm:"-"`
}
type EventNotifyRule struct {
Id int64 `json:"id"`
Name string `json:"name"`
}
func (e *AlertCurEvent) SetTagsMap() {
@@ -288,7 +298,49 @@ func (e *AlertCurEvent) ParseURL(url string) (string, error) {
return body.String(), nil
}
func (e *AlertCurEvent) GenCardTitle(rules []*AggrRule) string {
func parseAggrRules(rule string) []*AggrRule {
aggrRules := strings.Split(rule, "::") // e.g. field:group_name::field:severity::tagkey:ident
if len(aggrRules) == 0 {
ginx.Bomb(http.StatusBadRequest, "rule empty")
}
rules := make([]*AggrRule, len(aggrRules))
for i := 0; i < len(aggrRules); i++ {
pair := strings.Split(aggrRules[i], ":")
if len(pair) != 2 {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
if !(pair[0] == "field" || pair[0] == "tagkey") {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
rules[i] = &AggrRule{
Type: pair[0],
Value: pair[1],
}
}
return rules
}
func (e *AlertCurEvent) GenCardTitle(rule string) (string, error) {
if strings.Contains(rule, "{{") {
// 有 {{ 表示使用的是新的配置方式,使用 go template 进行格式化
tmpl, err := template.New("card_title").Parse(rule)
if err != nil {
return fmt.Sprintf("failed to parse card title: %v", err), nil
}
var buf bytes.Buffer
if err := tmpl.Execute(&buf, e); err != nil {
return fmt.Sprintf("failed to execute card title: %v", err), nil
}
return buf.String(), nil
}
rules := parseAggrRules(rule)
arr := make([]string, len(rules))
for i := 0; i < len(rules); i++ {
rule := rules[i]
@@ -302,10 +354,10 @@ func (e *AlertCurEvent) GenCardTitle(rules []*AggrRule) string {
}
if len(arr[i]) == 0 {
arr[i] = "Null"
arr[i] = "Others"
}
}
return strings.Join(arr, "::")
return strings.Join(arr, "::"), nil
}
func (e *AlertCurEvent) GetTagValue(tagkey string) string {
@@ -393,6 +445,7 @@ func (e *AlertCurEvent) ToHis(ctx *ctx.Context) *AlertHisEvent {
LastEvalTime: e.LastEvalTime,
NotifyCurNumber: e.NotifyCurNumber,
FirstTriggerTime: e.FirstTriggerTime,
NotifyRuleIds: e.NotifyRuleIds,
}
}
@@ -408,6 +461,22 @@ func (e *AlertCurEvent) DB2FE() error {
if err := json.Unmarshal([]byte(e.RuleConfig), &e.RuleConfigJson); err != nil {
return err
}
e.TagsMap = make(map[string]string)
for i := 0; i < len(e.TagsJSON); i++ {
pair := strings.TrimSpace(e.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
e.TagsMap[arr[0]] = arr[1]
}
return nil
}
@@ -525,7 +594,7 @@ func (e *AlertCurEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
}
func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
severity []int64, dsIds []int64, cates []string, ruleId int64, query string, eventIds []int64) (int64, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
@@ -538,8 +607,8 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
session = session.Where("group_id in ?", bgids)
}
if severity >= 0 {
session = session.Where("severity = ?", severity)
if len(severity) > 0 {
session = session.Where("severity in ?", severity)
}
if len(dsIds) > 0 {
@@ -554,6 +623,9 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
session = session.Where("rule_id = ?", ruleId)
}
if len(eventIds) > 0 {
session = session.Where("id in ?", eventIds)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -566,9 +638,10 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
}
func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string, limit, offset int) (
severity []int64, dsIds []int64, cates []string, ruleId int64, query string, limit, offset int, eventIds []int64) (
[]AlertCurEvent, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
}
@@ -580,8 +653,8 @@ func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("group_id in ?", bgids)
}
if severity >= 0 {
session = session.Where("severity = ?", severity)
if len(severity) > 0 {
session = session.Where("severity in ?", severity)
}
if len(dsIds) > 0 {
@@ -596,6 +669,10 @@ func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("rule_id = ?", ruleId)
}
if len(eventIds) > 0 {
session = session.Where("id in ?", eventIds)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -869,3 +946,100 @@ func AlertCurEventStatistics(ctx *ctx.Context, stime time.Time) map[string]inter
return res
}
func (e *AlertCurEvent) DeepCopy() *AlertCurEvent {
eventCopy := *e
// 复制指针字段
if e.NotifyGroupsObj != nil {
eventCopy.NotifyGroupsObj = make([]*UserGroup, len(e.NotifyGroupsObj))
for i, group := range e.NotifyGroupsObj {
if group != nil {
groupCopy := *group
eventCopy.NotifyGroupsObj[i] = &groupCopy
}
}
}
if e.NotifyUsersObj != nil {
eventCopy.NotifyUsersObj = make([]*User, len(e.NotifyUsersObj))
for i, user := range e.NotifyUsersObj {
if user != nil {
userCopy := *user
eventCopy.NotifyUsersObj[i] = &userCopy
}
}
}
if e.Target != nil {
targetCopy := *e.Target
eventCopy.Target = &targetCopy
}
// 复制切片字段
if e.CallbacksJSON != nil {
eventCopy.CallbacksJSON = make([]string, len(e.CallbacksJSON))
copy(eventCopy.CallbacksJSON, e.CallbacksJSON)
}
if e.NotifyChannelsJSON != nil {
eventCopy.NotifyChannelsJSON = make([]string, len(e.NotifyChannelsJSON))
copy(eventCopy.NotifyChannelsJSON, e.NotifyChannelsJSON)
}
if e.NotifyGroupsJSON != nil {
eventCopy.NotifyGroupsJSON = make([]string, len(e.NotifyGroupsJSON))
copy(eventCopy.NotifyGroupsJSON, e.NotifyGroupsJSON)
}
if e.TagsJSON != nil {
eventCopy.TagsJSON = make([]string, len(e.TagsJSON))
copy(eventCopy.TagsJSON, e.TagsJSON)
}
if e.TagsMap != nil {
eventCopy.TagsMap = make(map[string]string, len(e.TagsMap))
for k, v := range e.TagsMap {
eventCopy.TagsMap[k] = v
}
}
if e.OriginalTagsJSON != nil {
eventCopy.OriginalTagsJSON = make([]string, len(e.OriginalTagsJSON))
copy(eventCopy.OriginalTagsJSON, e.OriginalTagsJSON)
}
if e.AnnotationsJSON != nil {
eventCopy.AnnotationsJSON = make(map[string]string, len(e.AnnotationsJSON))
for k, v := range e.AnnotationsJSON {
eventCopy.AnnotationsJSON[k] = v
}
}
if e.ExtraInfo != nil {
eventCopy.ExtraInfo = make([]string, len(e.ExtraInfo))
copy(eventCopy.ExtraInfo, e.ExtraInfo)
}
if e.ExtraInfoMap != nil {
eventCopy.ExtraInfoMap = make([]map[string]string, len(e.ExtraInfoMap))
for i, m := range e.ExtraInfoMap {
if m != nil {
eventCopy.ExtraInfoMap[i] = make(map[string]string, len(m))
for k, v := range m {
eventCopy.ExtraInfoMap[i][k] = v
}
}
}
}
if e.NotifyRuleIds != nil {
eventCopy.NotifyRuleIds = make([]int64, len(e.NotifyRuleIds))
copy(eventCopy.NotifyRuleIds, e.NotifyRuleIds)
}
eventCopy.RuleConfigJson = e.RuleConfigJson
eventCopy.ExtraConfig = e.ExtraConfig
return &eventCopy
}

View File

@@ -55,6 +55,10 @@ type AlertHisEvent struct {
NotifyCurNumber int `json:"notify_cur_number"` // notify: current number
FirstTriggerTime int64 `json:"first_trigger_time"` // 连续告警的首次告警时间
ExtraConfig interface{} `json:"extra_config" gorm:"-"`
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
NotifyVersion int `json:"notify_version" gorm:"-"`
NotifyRules []*EventNotifyRule `json:"notify_rules" gorm:"-"`
}
func (e *AlertHisEvent) TableName() string {

View File

@@ -5,8 +5,6 @@ import (
"fmt"
"strconv"
"strings"
"text/template"
"text/template/parse"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -479,19 +477,10 @@ func (ar *AlertRule) Verify() error {
return errors.New("name is blank")
}
t, err := template.New("test").Parse(ar.Name)
if err != nil {
if str.Dangerous(ar.Name) {
return errors.New("Name has invalid characters")
}
for _, node := range t.Tree.Root.Nodes {
if tn := node.(*parse.TextNode); tn != nil {
if str.Dangerous(tn.String()) {
return fmt.Errorf("Name has invalid characters: %s", tn.String())
}
}
}
if ar.Prod == "" {
ar.Prod = METRIC
}

View File

@@ -408,9 +408,9 @@ func (s *AlertSubscribe) ModifyEvent(event *AlertCurEvent) {
}
if len(s.NotifyRuleIds) > 0 {
event.NotifyRuleIDs = s.NotifyRuleIds
event.NotifyRuleIds = s.NotifyRuleIds
} else {
event.NotifyRuleIDs = []int64{}
event.NotifyRuleIds = []int64{}
}
event.NotifyGroups = s.UserGroupIds

View File

@@ -170,6 +170,31 @@ func DatasourceGet(ctx *ctx.Context, id int64) (*Datasource, error) {
return ds, ds.DB2FE()
}
type DatasourceInfo struct {
Id int64 `json:"id"`
Name string `json:"name"`
PluginType string `json:"plugin_type"`
}
func GetDatasourceInfosByIds(ctx *ctx.Context, ids []int64) ([]*DatasourceInfo, error) {
if len(ids) == 0 {
return []*DatasourceInfo{}, nil
}
var dsInfos []*DatasourceInfo
err := DB(ctx).
Model(&Datasource{}).
Select("id", "name", "plugin_type").
Where("id in ?", ids).
Find(&dsInfos).Error
if err != nil {
return nil, err
}
return dsInfos, nil
}
func (ds *Datasource) Get(ctx *ctx.Context) error {
err := DB(ctx).Where("id = ?", ds.Id).First(ds).Error
if err != nil {

147
models/embedded_product.go Normal file
View File

@@ -0,0 +1,147 @@
package models
import (
"encoding/json"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type EmbeddedProduct struct {
ID int64 `json:"id" gorm:"primaryKey"` // 主键
Name string `json:"name" gorm:"column:name;type:varchar(255)"`
URL string `json:"url" gorm:"column:url;type:varchar(255)"`
IsPrivate bool `json:"is_private" gorm:"column:is_private;type:boolean"`
TeamIDs []int64 `json:"team_ids" gorm:"serializer:json"`
CreateAt int64 `json:"create_at" gorm:"column:create_at;not null;default:0"`
CreateBy string `json:"create_by" gorm:"column:create_by;type:varchar(64);not null;default:''"`
UpdateAt int64 `json:"update_at" gorm:"column:update_at;not null;default:0"`
UpdateBy string `json:"update_by" gorm:"column:update_by;type:varchar(64);not null;default:''"`
}
func (e *EmbeddedProduct) TableName() string {
return "embedded_product"
}
func (e *EmbeddedProduct) AfterFind(tx *gorm.DB) (err error) {
if e.TeamIDs == nil {
e.TeamIDs = []int64{}
}
return nil
}
func (e *EmbeddedProduct) Verify() error {
if e.Name == "" {
return errors.New("Name is blank")
}
if str.Dangerous(e.Name) {
return errors.New("Name has invalid characters")
}
if e.URL == "" {
return errors.New("URL is blank")
}
if e.IsPrivate && len(e.TeamIDs) == 0 {
return errors.New("TeamIDs is blank")
}
return nil
}
func AddEmbeddedProduct(ctx *ctx.Context, eps []EmbeddedProduct) error {
now := time.Now().Unix()
for i := range eps {
if err := eps[i].Verify(); err != nil {
return errors.Wrapf(err, "invalid entry %v", eps[i])
}
eps[i].CreateAt = now
eps[i].UpdateAt = now
}
// 用主键做冲突判断有冲突则更新UPSERT
return DB(ctx).Clauses(clause.OnConflict{
UpdateAll: true, // 冲突时更新所有字段
}).Create(&eps).Error
}
func EmbeddedProductGets(ctx *ctx.Context) ([]*EmbeddedProduct, error) {
var list []*EmbeddedProduct
err := DB(ctx).Find(&list).Error
return list, err
}
func GetEmbeddedProductByID(ctx *ctx.Context, id int64) (*EmbeddedProduct, error) {
var ep EmbeddedProduct
err := DB(ctx).Where("id = ?", id).First(&ep).Error
return &ep, err
}
func UpdateEmbeddedProduct(ctx *ctx.Context, ep *EmbeddedProduct) error {
if err := ep.Verify(); err != nil {
return err
}
return DB(ctx).Save(ep).Error
}
func DeleteEmbeddedProduct(ctx *ctx.Context, id int64) error {
return DB(ctx).Where("id = ?", id).Delete(&EmbeddedProduct{}).Error
}
func CanMigrateEP(ctx *ctx.Context) bool {
var count int64
err := DB(ctx).Model(&EmbeddedProduct{}).Count(&count).Error
if err != nil {
logger.Errorf("failed to get embedded-product table count, err:%v", err)
return false
}
return count <= 0
}
func MigrateEP(ctx *ctx.Context) {
var lst []string
_ = DB(ctx).Model(&Configs{}).Where("ckey=? and external=? ", "embedded-dashboards", 0).Pluck("cval", &lst).Error
if len(lst) > 0 {
var oldData []DashboardConfig
if err := json.Unmarshal([]byte(lst[0]), &oldData); err != nil {
return
}
if len(oldData) < 1 {
return
}
now := time.Now().Unix()
var newData []EmbeddedProduct
for _, v := range oldData {
newData = append(newData, EmbeddedProduct{
Name: v.Name,
URL: v.URL,
IsPrivate: false,
TeamIDs: []int64{},
CreateBy: "system",
CreateAt: now,
UpdateAt: now,
UpdateBy: "system",
})
}
err := DB(ctx).Create(&newData).Error
if err != nil {
logger.Errorf("failed to create embedded-product, err:%v", err)
}
}
}
type DashboardConfig struct {
ID string `json:"id"`
Name string `json:"name"`
URL string `json:"url"`
}

178
models/event_pipeline.go Normal file
View File

@@ -0,0 +1,178 @@
package models
import (
"errors"
"fmt"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
)
// EventPipeline 事件Pipeline模型
type EventPipeline struct {
ID int64 `json:"id" gorm:"primaryKey"`
Name string `json:"name" gorm:"type:varchar(128)"`
TeamIds []int64 `json:"team_ids" gorm:"type:text;serializer:json"`
TeamNames []string `json:"team_names" gorm:"-"`
Description string `json:"description" gorm:"type:varchar(255)"`
FilterEnable bool `json:"filter_enable" gorm:"type:tinyint(1)"`
LabelFilters []TagFilter `json:"label_filters" gorm:"type:text;serializer:json"`
AttrFilters []TagFilter `json:"attribute_filters" gorm:"type:text;serializer:json"`
ProcessorConfigs []ProcessorConfig `json:"processors" gorm:"type:text;serializer:json"`
CreateAt int64 `json:"create_at" gorm:"type:bigint"`
CreateBy string `json:"create_by" gorm:"type:varchar(64)"`
UpdateAt int64 `json:"update_at" gorm:"type:bigint"`
UpdateBy string `json:"update_by" gorm:"type:varchar(64)"`
Processors []Processor `json:"-" gorm:"-"`
}
type ProcessorConfig struct {
Typ string `json:"typ"`
Config interface{} `json:"config"`
}
func (e *EventPipeline) TableName() string {
return "event_pipeline"
}
func (e *EventPipeline) Verify() error {
if e.Name == "" {
return errors.New("name cannot be empty")
}
if len(e.TeamIds) == 0 {
return errors.New("team_ids cannot be empty")
}
if len(e.TeamIds) == 0 {
e.TeamIds = make([]int64, 0)
}
if len(e.LabelFilters) == 0 {
e.LabelFilters = make([]TagFilter, 0)
}
if len(e.AttrFilters) == 0 {
e.AttrFilters = make([]TagFilter, 0)
}
if len(e.ProcessorConfigs) == 0 {
e.ProcessorConfigs = make([]ProcessorConfig, 0)
}
return nil
}
// CreateEventPipeline 创建事件Pipeline
func CreateEventPipeline(ctx *ctx.Context, pipeline *EventPipeline) error {
return DB(ctx).Create(pipeline).Error
}
// GetEventPipeline 获取单个事件Pipeline
func GetEventPipeline(ctx *ctx.Context, id int64) (*EventPipeline, error) {
var pipeline EventPipeline
err := DB(ctx).Where("id = ?", id).First(&pipeline).Error
if err != nil {
return nil, err
}
pipeline.Verify()
return &pipeline, nil
}
func GetEventPipelinesByIds(ctx *ctx.Context, ids []int64) ([]*EventPipeline, error) {
var pipelines []*EventPipeline
err := DB(ctx).Where("id in ?", ids).Find(&pipelines).Error
return pipelines, err
}
// UpdateEventPipeline 更新事件Pipeline
func UpdateEventPipeline(ctx *ctx.Context, pipeline *EventPipeline) error {
return DB(ctx).Save(pipeline).Error
}
// DeleteEventPipeline 删除事件Pipeline
func DeleteEventPipeline(ctx *ctx.Context, id int64) error {
return DB(ctx).Delete(&EventPipeline{}, id).Error
}
// ListEventPipelines 获取事件Pipeline列表
func ListEventPipelines(ctx *ctx.Context) ([]*EventPipeline, error) {
if !ctx.IsCenter {
pipelines, err := poster.GetByUrls[[]*EventPipeline](ctx, "/v1/n9e/event-pipelines")
return pipelines, err
}
var pipelines []*EventPipeline
err := DB(ctx).Order("name asc").Find(&pipelines).Error
if err != nil {
return nil, err
}
for _, p := range pipelines {
p.Verify()
}
return pipelines, nil
}
// DeleteEventPipelines 批量删除事件Pipeline
func DeleteEventPipelines(ctx *ctx.Context, ids []int64) error {
return DB(ctx).Where("id in ?", ids).Delete(&EventPipeline{}).Error
}
// Update 更新事件Pipeline
func (e *EventPipeline) Update(ctx *ctx.Context, ref *EventPipeline) error {
ref.ID = e.ID
ref.CreateAt = e.CreateAt
ref.CreateBy = e.CreateBy
ref.UpdateAt = time.Now().Unix()
err := ref.Verify()
if err != nil {
return err
}
return DB(ctx).Model(e).Select("*").Updates(*ref).Error
}
// FillTeamNames 填充团队名称
func (e *EventPipeline) FillTeamNames(ctx *ctx.Context) error {
e.TeamNames = make([]string, 0, len(e.TeamIds))
if len(e.TeamIds) == 0 {
return nil
}
teamMap, err := UserGroupIdAndNameMap(ctx, e.TeamIds)
if err != nil {
return err
}
// 按原始TeamIds顺序填充TeamNames
for _, tid := range e.TeamIds {
if name, exists := teamMap[tid]; exists {
e.TeamNames = append(e.TeamNames, name)
}
}
return nil
}
func EventPipelineStatistics(ctx *ctx.Context) (*Statistics, error) {
if !ctx.IsCenter {
s, err := poster.GetByUrls[*Statistics](ctx, "/v1/n9e/statistic?name=event_pipeline")
return s, err
}
session := DB(ctx).Model(&EventPipeline{}).Select("count(*) as total", "max(update_at) as last_updated")
var stats []*Statistics
err := session.Find(&stats).Error
if err != nil {
return nil, err
}
if len(stats) == 0 {
return nil, fmt.Errorf("no event pipeline found")
}
return stats[0], nil
}

39
models/event_processor.go Normal file
View File

@@ -0,0 +1,39 @@
package models
import (
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/pkg/ctx"
)
type Processor interface {
Init(settings interface{}) (Processor, error) // 初始化配置
Process(ctx *ctx.Context, event *AlertCurEvent) *AlertCurEvent // 处理告警事件
}
type NewProcessorFn func(settings interface{}) (Processor, error)
var processorRegister = map[string]NewProcessorFn{}
func RegisterProcessor(typ string, p Processor) {
if _, found := processorRegister[typ]; found {
return
}
processorRegister[typ] = p.Init
}
func GetProcessorByType(typ string, settings interface{}) (Processor, error) {
typ = strings.TrimSpace(typ)
fn, found := processorRegister[typ]
if !found {
return nil, fmt.Errorf("processor type %s not found", typ)
}
processor, err := fn(settings)
if err != nil {
return nil, err
}
return processor, nil
}

View File

@@ -505,15 +505,15 @@ var NewTplMap = map[string]string{
{{if $event.IsRecovered}}恢复时间:{{timeformat $event.LastEvalTime}}{{else}}触发时间: {{timeformat $event.TriggerTime}}
触发时值: {{$event.TriggerValue}}{{end}}
发送时间: {{timestamp}}`,
Telegram: `**级别状态**: {{if $event.IsRecovered}}<font color="info">S{{$event.Severity}} Recovered</font>{{else}}<font color="warning">S{{$event.Severity}} Triggered</font>{{end}}
**规则标题**: {{$event.RuleName}}{{if $event.RuleNote}}
**规则备注**: {{$event.RuleNote}}{{end}}{{if $event.TargetIdent}}
**监控对象**: {{$event.TargetIdent}}{{end}}
**监控指标**: {{$event.TagsJSON}}{{if not $event.IsRecovered}}
**触发时值**: {{$event.TriggerValue}}{{end}}
{{if $event.IsRecovered}}**恢复时间**: {{timeformat $event.LastEvalTime}}{{else}}**首次触发时间**: {{timeformat $event.FirstTriggerTime}}{{end}}
{{$time_duration := sub now.Unix $event.FirstTriggerTime }}{{if $event.IsRecovered}}{{$time_duration = sub $event.LastEvalTime $event.FirstTriggerTime }}{{end}}**距离首次告警**: {{humanizeDurationInterface $time_duration}}
**发送时间**: {{timestamp}}`,
Telegram: `<b>级别状态: {{if $event.IsRecovered}}💚 S{{$event.Severity}} Recovered{{else}}⚠️ S{{$event.Severity}} Triggered{{end}}</b>
<b>规则标题</b>: {{$event.RuleName}}{{if $event.RuleNote}}
<b>规则备注</b>: {{$event.RuleNote}}{{end}}{{if $event.TargetIdent}}
<b>监控对象</b>: {{$event.TargetIdent}}{{end}}
<b>监控指标</b>: {{$event.TagsJSON}}{{if not $event.IsRecovered}}
<b>触发时值</b>: {{$event.TriggerValue}}{{end}}
{{if $event.IsRecovered}}<b>恢复时间</b>: {{timeformat $event.LastEvalTime}}{{else}}<b>首次触发时间</b>: {{timeformat $event.FirstTriggerTime}}{{end}}
{{$time_duration := sub now.Unix $event.FirstTriggerTime }}{{if $event.IsRecovered}}{{$time_duration = sub $event.LastEvalTime $event.FirstTriggerTime }}{{end}}<b>距离首次告警</b>: {{humanizeDurationInterface $time_duration}}
<b>发送时间</b>: {{timestamp}}`,
Wecom: `**级别状态**: {{if $event.IsRecovered}}<font color="info">💚S{{$event.Severity}} Recovered</font>{{else}}<font color="warning">💔S{{$event.Severity}} Triggered</font>{{end}}
**规则标题**: {{$event.RuleName}}{{if $event.RuleNote}}
**规则备注**: {{$event.RuleNote}}{{end}}{{if $event.TargetIdent}}

View File

@@ -67,7 +67,8 @@ func MigrateTables(db *gorm.DB) error {
&TaskRecord{}, &ChartShare{}, &Target{}, &Configs{}, &Datasource{}, &NotifyTpl{},
&Board{}, &BoardBusigroup{}, &Users{}, &SsoConfig{}, &models.BuiltinMetric{},
&models.MetricFilter{}, &models.NotificaitonRecord{}, &models.TargetBusiGroup{},
&models.UserToken{}, &models.DashAnnotation{}, MessageTemplate{}, NotifyRule{}, NotifyChannelConfig{}, &EsIndexPatternMigrate{}}
&models.UserToken{}, &models.DashAnnotation{}, MessageTemplate{}, NotifyRule{}, NotifyChannelConfig{}, &EsIndexPatternMigrate{},
&models.EventPipeline{}, &models.EmbeddedProduct{}, &models.SourceToken{}}
if isPostgres(db) {
dts = append(dts, &models.PostgresBuiltinComponent{})
@@ -79,24 +80,20 @@ func MigrateTables(db *gorm.DB) error {
dts = append(dts, &imodels.TaskSchedulerHealth{})
}
if !columnHasIndex(db, &AlertHisEvent{}, "original_tags") ||
!columnHasIndex(db, &AlertCurEvent{}, "original_tags") {
asyncDts := []interface{}{&AlertHisEvent{}, &AlertCurEvent{}}
go func() {
defer func() {
if r := recover(); r != nil {
logger.Errorf("panic to migrate table: %v", r)
}
}()
for _, dt := range asyncDts {
if err := db.AutoMigrate(dt); err != nil {
logger.Errorf("failed to migrate table %+v err:%v", dt, err)
}
asyncDts := []interface{}{&AlertHisEvent{}, &AlertCurEvent{}}
go func() {
defer func() {
if r := recover(); r != nil {
logger.Errorf("panic to migrate table: %v", r)
}
}()
}
for _, dt := range asyncDts {
if err := db.AutoMigrate(dt); err != nil {
logger.Errorf("failed to migrate table %+v err:%v", dt, err)
}
}
}()
if !db.Migrator().HasTable(&models.BuiltinPayload{}) {
dts = append(dts, &models.BuiltinPayload{})
@@ -177,11 +174,6 @@ func InsertPermPoints(db *gorm.DB) {
Operation: "/help/variable-configs",
})
ops = append(ops, models.RoleOperation{
RoleName: "Admin",
Operation: "/permissions",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/ibex-settings",
@@ -227,12 +219,31 @@ func InsertPermPoints(db *gorm.DB) {
Operation: "/notification-rules/del",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/add",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/del",
})
for _, op := range ops {
var count int64
err := db.Model(&models.RoleOperation{}).
Where("operation = ? AND role_name = ?", op.Operation, op.RoleName).
Count(&count).Error
session := db.Session(&gorm.Session{}).Model(&models.RoleOperation{})
err := session.Where("operation = ? AND role_name = ?", op.Operation, op.RoleName).Count(&count).Error
if err != nil {
logger.Errorf("check role operation exists failed, %v", err)
@@ -243,7 +254,7 @@ func InsertPermPoints(db *gorm.DB) {
continue
}
err = db.Model(&models.RoleOperation{}).Create(&op).Error
err = session.Create(&op).Error
if err != nil {
logger.Errorf("insert role operation failed, %v", err)
}
@@ -291,12 +302,14 @@ type TaskRecord struct {
EventId int64 `gorm:"column:event_id;bigint(20);not null;default:0;comment:event id;index:idx_event_id"`
}
type AlertHisEvent struct {
LastEvalTime int64 `gorm:"column:last_eval_time;bigint(20);not null;default:0;comment:for time filter;index:idx_last_eval_time"`
OriginalTags string `gorm:"column:original_tags;type:text;comment:labels key=val,,k2=v2"`
LastEvalTime int64 `gorm:"column:last_eval_time;bigint(20);not null;default:0;comment:for time filter;index:idx_last_eval_time"`
OriginalTags string `gorm:"column:original_tags;type:text;comment:labels key=val,,k2=v2"`
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:text;serializer:json;comment:notify rule ids"`
}
type AlertCurEvent struct {
OriginalTags string `gorm:"column:original_tags;type:text;comment:labels key=val,,k2=v2"`
OriginalTags string `gorm:"column:original_tags;type:text;comment:labels key=val,,k2=v2"`
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:text;serializer:json;comment:notify rule ids"`
}
type Target struct {
@@ -414,16 +427,17 @@ func (t *MessageTemplate) TableName() string {
}
type NotifyRule struct {
ID int64 `gorm:"column:id;primaryKey;autoIncrement"`
Name string `gorm:"column:name;type:varchar(255);not null"`
Description string `gorm:"column:description;type:text"`
Enable bool `gorm:"column:enable;not null;default:false"`
UserGroupIds []int64 `gorm:"column:user_group_ids;type:varchar(255)"`
NotifyConfigs []models.NotifyConfig `gorm:"column:notify_configs;type:text"`
CreateAt int64 `gorm:"column:create_at;not null;default:0"`
CreateBy string `gorm:"column:create_by;type:varchar(64);not null;default:''"`
UpdateAt int64 `gorm:"column:update_at;not null;default:0"`
UpdateBy string `gorm:"column:update_by;type:varchar(64);not null;default:''"`
ID int64 `gorm:"column:id;primaryKey;autoIncrement"`
Name string `gorm:"column:name;type:varchar(255);not null"`
Description string `gorm:"column:description;type:text"`
Enable bool `gorm:"column:enable;not null;default:false"`
UserGroupIds []int64 `gorm:"column:user_group_ids;type:varchar(255)"`
NotifyConfigs []models.NotifyConfig `gorm:"column:notify_configs;type:text"`
PipelineConfigs []models.PipelineConfig `gorm:"column:pipeline_configs;type:text"`
CreateAt int64 `gorm:"column:create_at;not null;default:0"`
CreateBy string `gorm:"column:create_by;type:varchar(64);not null;default:''"`
UpdateAt int64 `gorm:"column:update_at;not null;default:0"`
UpdateBy string `gorm:"column:update_by;type:varchar(64);not null;default:''"`
}
func (r *NotifyRule) TableName() string {

View File

@@ -34,11 +34,6 @@ func TestInsertPermPoints(t *testing.T) {
Operation: "/help/variable-configs",
})
ops = append(ops, models.RoleOperation{
RoleName: "Admin",
Operation: "/permissions",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/ibex-settings",

View File

@@ -157,7 +157,7 @@ func (ncc *NotifyChannelConfig) SendScript(events []*AlertCurEvent, tpl map[stri
return "", "", fmt.Errorf("script or path is empty")
}
fpath := ".notify_scriptt"
fpath := ".notify_script_" + strconv.FormatInt(ncc.ID, 10)
if config.Path != "" {
fpath = config.Path
} else {
@@ -388,7 +388,10 @@ func (ncc *NotifyChannelConfig) SendFlashDuty(events []*AlertCurEvent, flashDuty
// 设置 URL 参数
query := req.URL.Query()
query.Add("channel_id", strconv.FormatInt(flashDutyChannelID, 10))
if flashDutyChannelID != 0 {
// 如果 flashduty 有配置协作空间(channel_id),则传入 channel_id 参数
query.Add("channel_id", strconv.FormatInt(flashDutyChannelID, 10))
}
req.URL.RawQuery = query.Encode()
req.Header.Add("Content-Type", "application/json")
@@ -800,6 +803,7 @@ func (ncc *NotifyChannelConfig) SendEmailNow(events []*AlertCurEvent, tpl map[st
s, err := d.Dial()
if err != nil {
logger.Errorf("email_sender: failed to dial: %s", err)
return err
}
m := gomail.NewMessage()
@@ -1232,12 +1236,12 @@ var NotiChMap = []*NotifyChannelConfig{
Name: "Telegram", Ident: Telegram, RequestType: "http", Weight: 7, Enable: true,
RequestConfig: &RequestConfig{
HTTPRequestConfig: &HTTPRequestConfig{
URL: "https://api.telegram.org/bot{{$params.token}}/sendMessage",
Method: "POST",
URL: "https://api.telegram.org/bot{{$params.token}}/sendMessage",
Method: "POST", Headers: map[string]string{"Content-Type": "application/json"},
Timeout: 10000, Concurrency: 5, RetryTimes: 3, RetryInterval: 100,
Request: RequestDetail{
Parameters: map[string]string{"chat_id": "{{$params.chat_id}}"},
Body: `{"parse_mode": "markdown", "text": "{{$tpl.content}}"}`,
Body: `{"text":"{{$tpl.content}}","parse_mode": "HTML"}`,
},
},
},

View File

@@ -15,6 +15,8 @@ type NotifyRule struct {
Enable bool `json:"enable"` // 启用状态
UserGroupIds []int64 `json:"user_group_ids" gorm:"serializer:json"` // 告警组ID
PipelineConfigs []PipelineConfig `json:"pipeline_configs" gorm:"serializer:json"`
// 通知配置
NotifyConfigs []NotifyConfig `json:"notify_configs" gorm:"serializer:json"`
@@ -24,6 +26,11 @@ type NotifyRule struct {
UpdateBy string `json:"update_by"`
}
type PipelineConfig struct {
PipelineId int64 `json:"pipeline_id"`
Enable bool `json:"enable"`
}
func (r *NotifyRule) TableName() string {
return "notify_rule"
}
@@ -209,7 +216,7 @@ func NotifyRulesGet(ctx *ctx.Context, where string, args ...interface{}) ([]*Not
if where != "" && len(args) > 0 {
session = session.Where(where, args...)
}
err := session.Find(&lst).Error
err := session.Order("name asc").Find(&lst).Error
if err != nil {
return nil, err
}

48
models/source_token.go Normal file
View File

@@ -0,0 +1,48 @@
package models
import (
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
)
type SourceToken struct {
Id int64 `json:"id" gorm:"primaryKey"`
SourceType string `json:"source_type" gorm:"column:source_type;type:varchar(64);not null;default:''"`
SourceId string `json:"source_id" gorm:"column:source_id;type:varchar(255);not null;default:''"`
Token string `json:"token" gorm:"column:token;type:varchar(255);not null;default:''"`
ExpireAt int64 `json:"expire_at" gorm:"type:bigint;not null;default:0"`
CreateAt int64 `json:"create_at" gorm:"type:bigint;not null;default:0"`
CreateBy string `json:"create_by" gorm:"type:varchar(64);not null;default:''"`
}
func (SourceToken) TableName() string {
return "source_token"
}
func (st *SourceToken) Add(ctx *ctx.Context) error {
return Insert(ctx, st)
}
// GetSourceTokenBySource 根据源类型和源ID获取源令牌
func GetSourceTokenBySource(ctx *ctx.Context, sourceType, sourceId, token string) (*SourceToken, error) {
var st SourceToken
err := DB(ctx).Where("source_type = ? AND source_id = ? AND token = ?", sourceType, sourceId, token).First(&st).Error
if err != nil {
return nil, err
}
return &st, nil
}
func (st *SourceToken) IsExpired() bool {
if st.ExpireAt == 0 {
return false // 0 表示永不过期
}
return time.Now().Unix() > st.ExpireAt
}
func CleanupExpiredTokens(ctx *ctx.Context) (int64, error) {
now := time.Now().Unix()
result := DB(ctx).Where("expire_at > 0 AND expire_at < ?", now).Delete(&SourceToken{})
return result.RowsAffected, result.Error
}

View File

@@ -121,6 +121,25 @@ func (u *User) IsAdmin() bool {
return false
}
// has group permission
func (u *User) CheckGroupPermission(ctx *ctx.Context, groupIds []int64) error {
if !u.IsAdmin() {
ids, err := MyGroupIdsMap(ctx, u.Id)
if err != nil {
return err
}
for _, id := range groupIds {
if _, ok := ids[id]; ok {
return nil
}
}
return errors.New("forbidden")
}
return nil
}
func (u *User) Verify() error {
u.Username = strings.TrimSpace(u.Username)

View File

@@ -113,6 +113,19 @@ func UserGroupGetByIds(ctx *ctx.Context, ids []int64) ([]UserGroup, error) {
return lst, err
}
func UserGroupIdAndNameMap(ctx *ctx.Context, ids []int64) (map[int64]string, error) {
lst, err := UserGroupGetByIds(ctx, ids)
if err != nil {
return nil, err
}
m := make(map[int64]string)
for _, ug := range lst {
m[ug.Id] = ug.Name
}
return m, nil
}
func UserGroupGetAll(ctx *ctx.Context) ([]*UserGroup, error) {
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*UserGroup](ctx, "/v1/n9e/user-groups")

View File

@@ -22,6 +22,20 @@ func MyGroupIds(ctx *ctx.Context, userId int64) ([]int64, error) {
return ids, err
}
func MyGroupIdsMap(ctx *ctx.Context, userId int64) (map[int64]struct{}, error) {
ids, err := MyGroupIds(ctx, userId)
if err != nil {
return nil, err
}
res := make(map[int64]struct{}, len(ids))
for _, id := range ids {
res[id] = struct{}{}
}
return res, nil
}
// my business group ids
func MyBusiGroupIds(ctx *ctx.Context, userId int64) ([]int64, error) {
groupIds, err := MyGroupIds(ctx, userId)

View File

@@ -1,159 +1,6 @@
package i18nx
var I18N = `{
"zh": {
"Username or password invalid": "用户名或密码错误",
"incorrect verification code": "验证码错误",
"roles empty": "角色不能为空",
"Username already exists": "此用户名已存在 请使用其他用户名",
"failed to count user-groups": "校验数据失败 请重试",
"UserGroup already exists": "组名已存在 请使用其他名称",
"members empty": "成员不能为空",
"At least one team have rw permission": "至少需要有一个团队有读写权限",
"Failed to create BusiGroup(%s)": "[%s]创建失败 请重试",
"business group id invalid": "业务组 id 不正确",
"idents empty": "监控对象不能为空",
"invalid tag(%s)": "tag不合法[%s]",
"invalid tagkey(%s): cannot contains . ": "tagkey[%s]不能包含.",
"invalid tagkey(%s): cannot contains _ ": "tagkey[%s]不能包含_",
"invalid tagkey(%s)": "tagkey不合法[%s]",
"duplicate tagkey(%s)": "tagkey(%s)重复了",
"name is empty": "名称不能为空",
"Ident duplicate": "仪表盘唯一标识已存在",
"No such dashboard": "仪表盘不存在",
"Name has invalid characters": "名称包含非法字符",
"Name is blank": "名称不能为空",
"forbidden": "没有权限",
"builtin alerts is empty, file: %s": "内置告警模板为空 %s",
"input json is empty": "提交内容不能为空",
"fields empty": "选择字段不能为空",
"No such AlertRule": "无此告警规则",
"GroupId(%d) invalid": "业务组id无效",
"No such recording rule": "无此记录规则",
"tags is blank": "标签不能为空",
"oops... etime(%d) <= btime(%d)": "开始时间,不能大于结束时间",
"group_id invalid": "业务组无效",
"No such AlertMute": "无此屏蔽规则",
"rule_id and tags are both blank": "告警规则和标签不能同时为空",
"rule is blank": "规则不能为空",
"rule invalid": "规则无效 请检查是否正确",
"unsupported field: %s": "不支持字段 %s",
"arg(batch) should be nonnegative": "batch 不能为负数",
"arg(tolerance) should be nonnegative": "tolerance 不能为负数",
"arg(timeout) should be nonnegative": "timeout 不能为负数",
"arg(timeout) longer than five days": "timeout 时间不能超过5天",
"arg(title) is required": "title 为必填项",
"created task.id is zero": "任务id为零",
"invalid ibex address: %s": "ibex %s 地址无效",
"url path invalid": "url非法",
"no such server": "无此实例",
"admin role can not be modified": "管理员角色不允许修改",
"builtin payload already exists": "内置模板已存在",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "此功能尚未启用。请联系系统管理员启用",
"Dashboards": "仪表盘",
"View Dashboards": "查看仪表盘",
"Add Dashboard": "添加仪表盘",
"Modify Dashboard": "修改仪表盘",
"Delete Dashboard": "删除仪表盘",
"View Embedded Dashboard": "查看集成仪表盘",
"Modify Embedded Dashboard": "修改集成仪表盘",
"View Public Dashboard": "查看公开仪表盘",
"Time Series Metrics": "时序指标",
"View Metric Data": "查看即时查询",
"View Object Data": "查看快捷视图",
"Metric Views": "指标视图",
"View Built-in Metrics": "查看内置指标",
"Add Built-in Metric": "添加内置指标",
"Modify Built-in Metric": "修改内置指标",
"Delete Built-in Metric": "删除内置指标",
"Recording Rule Management": "记录规则",
"View Recording Rules": "查看记录规则",
"Add Recording Rule": "添加记录规则",
"Modify Recording Rule": "修改记录规则",
"Delete Recording Rule": "删除记录规则",
"Log Analysis": "日志分析",
"View Logs": "查看日志",
"View Index Patterns": "查看索引模式",
"Alert Rules": "告警规则",
"View Alert Rules": "查看告警规则",
"Add Alert Rule": "添加告警规则",
"Modify Alert Rule": "修改告警规则",
"Delete Alert Rule": "删除告警规则",
"Alert Silence Management": "屏蔽规则",
"View Alert Silences": "查看告警屏蔽规则",
"Add Alert Silence": "添加告警屏蔽规则",
"Modify Alert Silence": "修改告警屏蔽规则",
"Delete Alert Silence": "删除告警屏蔽规则",
"Alert Subscription Management": "订阅规则",
"View Alert Subscriptions": "查看告警订阅规则",
"Add Alert Subscription": "添加告警订阅规则",
"Modify Alert Subscription": "修改告警订阅规则",
"Delete Alert Subscription": "删除告警订阅规则",
"Alert Event Management": "告警事件",
"View Current Alerts": "查看活跃告警",
"Delete Current Alert": "删除活跃告警",
"View Historical Alerts": "查看历史告警",
"Alert Notification": "告警通知",
"View Notification Settings": "通知设置",
"View Notification Templates": "通知模板",
"Task Management": "任务管理",
"View Task Templates": "查看任务模板",
"Add Task Template": "添加任务模板",
"Modify Task Template": "修改任务模板",
"Delete Task Template": "删除任务模板",
"View Task Instances": "查看任务历史",
"Add Task Instance": "创建临时任务",
"Modify Task Instance": "管理正在运行的任务",
"Infrastructure": "基础设施",
"View Objects": "查看机器",
"Add Object": "添加机器",
"Modify Object": "修改机器",
"Delete Object": "删除机器",
"Bind Object": "分配未归组机器",
"User Management": "用户管理",
"View User List": "查看用户列表",
"View User Groups": "查看团队",
"Add User Group": "添加团队",
"Modify User Group": "修改团队",
"Delete User Group": "删除团队",
"Business Group Management": "业务组管理",
"View Business Groups": "查看业务组",
"Add Business Group": "添加业务组",
"Modify Business Group": "修改业务组",
"Delete Business Group": "删除业务组",
"Permission Management": "权限管理",
"View Permission Settings": "查看权限设置",
"Template Center": "模板中心",
"View Built-in Components": "查看内置模板",
"Add Built-in Component": "添加内置模板",
"Modify Built-in Component": "修改内置模板",
"Delete Built-in Component": "删除内置模板",
"Data Source Management": "数据源管理",
"View Data Source Configuration": "数据源管理",
"System Information": "系统配置",
"View Variable Configuration": "变量配置管理",
"View Version Information": "查看关于产品",
"View Server Information": "查看告警引擎列表",
"View SSO Configuration": "单点登录管理",
"View Migration Configuration": "查看迁移配置",
"View Site Settings": "查看站点设置",
"View Message Templates": "查看消息模板",
"Add Message Templates": "添加消息模板",
"Modify Message Templates": "修改消息模板",
"Delete Message Templates": "删除消息模板",
"View Notify Rules": "查看通知规则",
"Add Notify Rules": "添加通知规则",
"Modify Notify Rules": "修改通知规则",
"Delete Notify Rules": "删除通知规则",
"View Notify Channels": "查看通知媒介",
"Add Notify Channels": "添加通知媒介",
"Modify Notify Channels": "修改通知媒介",
"Delete Notify Channels": "删除通知媒介",
"Notify Channels": "通知媒介",
"Notify Rules": "通知规则",
"Message Templates": "消息模板",
"User Contact Management": "用户联系方式管理"
},
"zh_CN": {
"Username or password invalid": "用户名或密码错误",
"incorrect verification code": "验证码错误",
@@ -203,109 +50,117 @@ var I18N = `{
"admin role can not be modified": "管理员角色不允许修改",
"builtin payload already exists": "内置模板已存在",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "此功能尚未启用。请联系系统管理员启用",
"Dashboards": "仪表盘",
"View Dashboards": "查看仪表盘",
"Add Dashboard": "添加仪表盘",
"Modify Dashboard": "修改仪表盘",
"Delete Dashboard": "删除仪表盘",
"View Embedded Dashboard": "查看集成仪表盘",
"Modify Embedded Dashboard": "修改集成仪表盘",
"View Public Dashboard": "查看公开仪表盘",
"Time Series Metrics": "时序指标",
"View Metric Data": "查看即时查询",
"View Object Data": "查看快捷视图",
"Metric Views": "指标视图",
"View Built-in Metrics": "查看内置指标",
"Add Built-in Metric": "添加内置指标",
"Modify Built-in Metric": "修改内置指标",
"Delete Built-in Metric": "删除内置指标",
"Recording Rule Management": "记录规则",
"View Recording Rules": "查看记录规则",
"Add Recording Rule": "添加记录规则",
"Modify Recording Rule": "修改记录规则",
"Delete Recording Rule": "删除记录规则",
"Log Analysis": "日志分析",
"View Logs": "查看日志",
"View Index Patterns": "查看索引模式",
"Alert Rules": "告警规则",
"View Alert Rules": "查看告警规则",
"Add Alert Rule": "添加告警规则",
"Modify Alert Rule": "修改告警规则",
"Delete Alert Rule": "删除告警规则",
"Alert Silence Management": "屏蔽规则",
"View Alert Silences": "查看告警屏蔽规则",
"Add Alert Silence": "添加告警屏蔽规则",
"Modify Alert Silence": "修改告警屏蔽规则",
"Delete Alert Silence": "删除告警屏蔽规则",
"Alert Subscription Management": "订阅规则",
"View Alert Subscriptions": "查看告警订阅规则",
"Add Alert Subscription": "添加告警订阅规则",
"Modify Alert Subscription": "修改告警订阅规则",
"Delete Alert Subscription": "删除告警订阅规则",
"Alert Event Management": "告警事件",
"View Current Alerts": "查看活跃告警",
"Delete Current Alert": "删除活跃告警",
"View Historical Alerts": "查看历史告警",
"Alert Notification": "告警通知",
"View Notification Settings": "通知设置管理",
"View Notification Templates": "通知模板管理",
"Task Management": "任务管理",
"View Task Templates": "查看任务模板",
"Add Task Template": "添加任务模板",
"Modify Task Template": "修改任务模板",
"Delete Task Template": "删除任务模板",
"View Task Instances": "查看任务历史",
"Add Task Instance": "创建临时任务",
"Modify Task Instance": "管理正在运行的任务",
"targets not exist: %s": "有些机器不存在: %s",
"Infrastructure": "基础设施",
"View Objects": "查看机器",
"Add Object": "添加机器",
"Modify Object": "修改机器",
"Delete Object": "删除机器",
"Bind Object": "分配未归组机器",
"User Management": "用户管理",
"View User List": "查看用户列表",
"View User Groups": "查看团队",
"Add User Group": "添加团队",
"Modify User Group": "修改团队",
"Delete User Group": "删除团队",
"Business Group Management": "业务组管理",
"View Business Groups": "查看业务组",
"Add Business Group": "添加业务组",
"Modify Business Group": "修改业务组",
"Delete Business Group": "删除业务组",
"Permission Management": "权限管理",
"View Permission Settings": "查看权限设置",
"Template Center": "模板中心",
"View Built-in Components": "查看内置模板",
"Add Built-in Component": "添加内置模板",
"Modify Built-in Component": "修改内置模板",
"Delete Built-in Component": "删除内置模板",
"Data Source Management": "数据源管理",
"View Data Source Configuration": "数据源管理",
"System Information": "系统配置",
"View Variable Configuration": "变量配置管理",
"View Version Information": "查看关于产品",
"View Server Information": "查看告警引擎列表",
"View SSO Configuration": "单点登录管理",
"View Migration Configuration": "查看迁移配置",
"Host - View": "机器 - 查看",
"Host - Modify": "机器 - 修改",
"Host - Delete": "机器 - 删除",
"Host - Bind Uncategorized": "机器 - 绑定未归组机器到某个业务组",
"Explorer": "数据查询",
"Metrics Explorer": "指标查询",
"Quick View": "快捷视图",
"Built-in Metric - View": "内置指标 - 查看",
"Built-in Metric - Add": "内置指标 - 新增",
"Built-in Metric - Modify": "内置指标 - 修改",
"Built-in Metric - Delete": "内置指标 - 删除",
"Recording Rule - View": "记录规则 - 查看",
"Recording Rule - Add": "记录规则 - 新增",
"Recording Rule - Modify": "记录规则 - 修改",
"Recording Rule - Delete": "记录规则 - 删除",
"Logs Explorer": "日志查询",
"Index Pattern - View": "索引模式 - 查看",
"Index Pattern - Add": "索引模式 - 新增",
"Index Pattern - Modify": "索引模式 - 修改",
"Index Pattern - Delete": "索引模式 - 删除",
"Dashboard - View": "仪表盘 - 查看",
"Dashboard - Add": "仪表盘 - 新增",
"Dashboard - Modify": "仪表盘 - 修改",
"Dashboard - Delete": "仪表盘 - 删除",
"Dashboard - View Public": "仪表盘 - 查看公开仪表盘",
"Alerting": "告警",
"Alerting Rule - View": "告警规则 - 查看",
"Alerting Rule - Add": "告警规则 - 新增",
"Alerting Rule - Modify": "告警规则 - 修改",
"Alerting Rule - Delete": "告警规则 - 删除",
"Mutting Rule - View": "屏蔽规则 - 查看",
"Mutting Rule - Add": "屏蔽规则 - 新增",
"Mutting Rule - Modify": "屏蔽规则 - 修改",
"Mutting Rule - Delete": "屏蔽规则 - 删除",
"Subscribing Rule - View": "订阅规则 - 查看",
"Subscribing Rule - Add": "订阅规则 - 新增",
"Subscribing Rule - Modify": "订阅规则 - 修改",
"Subscribing Rule - Delete": "订阅规则 - 删除",
"Self-healing-Script - View": "自愈脚本 - 查看",
"Self-healing-Script - Add": "自愈脚本 - 新增",
"Self-healing-Script - Modify": "自愈脚本 - 修改",
"Self-healing-Script - Delete": "自愈脚本 - 删除",
"Self-healing-Job - View": "自愈任务 - 查看",
"Self-healing-Job - Add": "自愈任务 - 新增",
"Self-healing-Job - Modify": "自愈任务 - 修改",
"Active Event - View": "活跃事件 - 查看",
"Active Event - Delete": "活跃事件 - 删除",
"Historical Event - View": "历史事件 - 查看",
"Notification": "通知",
"Notification Rule - View": "通知规则 - 查看",
"Notification Rule - Add": "通知规则 - 新增",
"Notification Rule - Modify": "通知规则 - 修改",
"Notification Rule - Delete": "通知规则 - 删除",
"Media Type - View": "通知媒介 - 查看",
"Media Type - Add": "通知媒介 - 新增",
"Media Type - Modify": "通知媒介 - 修改",
"Media Type - Delete": "通知媒介 - 删除",
"Message Template - View": "消息模板 - 查看",
"Message Template - Add": "消息模板 - 新增",
"Message Template - Modify": "消息模板 - 修改",
"Message Template - Delete": "消息模板 - 删除",
"Event Pipeline - View": "事件管道 - 查看",
"Event Pipeline - Add": "事件管道 - 新增",
"Event Pipeline - Modify": "事件管道 - 修改",
"Event Pipeline - Delete": "事件管道 - 删除",
"Notification Settings - View": "老版本通知设置 - 查看",
"Notification Templates - View": "老版本消息模板 - 查看",
"Integrations": "集成中心",
"Data Source - View": "数据源 - 查看",
"Component - View": "组件 - 查看",
"Component - Add": "组件 - 新增",
"Component - Modify": "组件 - 修改",
"Component - Delete": "组件 - 删除",
"Embedded Product - View": "系统集成 - 查看",
"Embedded Product - Add": "系统集成 - 新增",
"Embedded Product - Modify": "系统集成 - 修改",
"Embedded Product - Delete": "系统集成 - 删除",
"Organization": "人员组织",
"User - View": "用户 - 查看",
"User - Add": "用户 - 新增",
"User - Modify": "用户 - 修改",
"User - Delete": "用户 - 删除",
"Team - View": "团队 - 查看",
"Team - Add": "团队 - 新增",
"Team - Modify": "团队 - 修改",
"Team - Delete": "团队 - 删除",
"Business Group - View": "业务组 - 查看",
"Business Group - Add": "业务组 - 新增",
"Business Group - Modify": "业务组 - 修改",
"Business Group - Delete": "业务组 - 删除",
"Role - View": "角色 - 查看",
"Role - Add": "角色 - 新增",
"Role - Modify": "角色 - 修改",
"Role - Delete": "角色 - 删除",
"System Settings": "系统配置",
"View Site Settings": "查看站点设置",
"View Message Templates": "查看消息模板",
"Add Message Templates": "添加消息模板",
"Modify Message Templates": "修改消息模板",
"Delete Message Templates": "删除消息模板",
"View Notify Rules": "查看通知规则",
"Add Notify Rules": "添加通知规则",
"Modify Notify Rules": "修改通知规则",
"Delete Notify Rules": "删除通知规则",
"View Notify Channels": "查看通知媒介",
"Add Notify Channels": "添加通知媒介",
"Modify Notify Channels": "修改通知媒介",
"Delete Notify Channels": "删除通知媒介",
"Notify Channels": "通知媒介",
"Notify Rules": "通知规则",
"Message Templates": "消息模板",
"User Contact Management": "用户联系方式管理"
"View Variable Settings": "查看变量配置",
"View SSO Settings": "查看单点登录配置",
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看产品版本",
"---------zh_CN--------": "---------zh_CN--------"
},
"zh_HK": {
"Username or password invalid": "用戶名或密碼錯誤",
@@ -359,118 +214,117 @@ var I18N = `{
"builtin metric already exists": "內置指標已存在",
"AlertRule already exists": "告警規則已存在",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "此功能尚未啟用。請聯繫系統管理員啟用",
"dashboards": "儀表板",
"/dashboards": "讀取儀表板信息",
"/dashboards/add": "新增儀表板",
"/dashboards/put": "修改儀表板",
"/dashboards/del": "刪除儀表板",
"/embedded-dashboards/put": "修改嵌入儀表板",
"/embedded-dashboards": "查看嵌入儀表板",
"/public-dashboards": "查看公開儀表板",
"Dashboards": "儀表板",
"View Dashboards": "查看儀表板",
"Add Dashboard": "添加儀表板",
"Modify Dashboard": "修改儀表板",
"Delete Dashboard": "刪除儀表板",
"Modify Embedded Dashboard": "修改嵌入儀表板",
"View Embedded Dashboard": "查看集成儀表板",
"View Public Dashboard": "查看公開儀表板",
"Time Series Metrics": "時序指標",
"View Metric Data": "查看即時查詢",
"View Object Data": "查看快捷視圖",
"Metric Views": "指標視圖",
"View Built-in Metrics": "查看內置指標",
"Add Built-in Metric": "添加內置指標",
"Modify Built-in Metric": "修改內置指標",
"Delete Built-in Metric": "刪除內置指標",
"Recording Rule Management": "記錄規則",
"View Recording Rules": "查看記錄規則",
"Add Recording Rule": "添加記錄規則",
"Modify Recording Rule": "修改記錄規則",
"Delete Recording Rule": "刪除記錄規則",
"Log Analysis": "日誌分析",
"View Logs": "查看日誌",
"View Index Patterns": "查看索引模式",
"Alert Rules": "告警規則",
"View Alert Rules": "查看告警規則",
"Add Alert Rule": "添加告警規則",
"Modify Alert Rule": "修改告警規則",
"Delete Alert Rule": "刪除告警規則",
"Alert Silence Management": "屏蔽規則",
"View Alert Silences": "查看告警屏蔽規則",
"Add Alert Silence": "添加告警屏蔽規則",
"Modify Alert Silence": "修改告警屏蔽規則",
"Delete Alert Silence": "刪除告警屏蔽規則",
"Alert Subscription Management": "訂閱規則",
"View Alert Subscriptions": "查看告警訂閱規則",
"Add Alert Subscription": "添加告警訂閱規則",
"Modify Alert Subscription": "修改告警訂閱規則",
"Delete Alert Subscription": "刪除告警訂閱規則",
"Alert Event Management": "告警事件管理",
"View Current Alerts": "查看當前告警",
"Delete Current Alert": "刪除當前告警",
"View Historical Alerts": "查看歷史告警",
"Alert Notification": "告警通知",
"View Notification Settings": "通知設置管理",
"View Notification Templates": "通知模板管理",
"Task Management": "任務管理",
"View Task Templates": "查看任務模板",
"Add Task Template": "添加任務模板",
"Modify Task Template": "修改任務模板",
"Delete Task Template": "刪除任務模板",
"View Task Instances": "查看任務實例",
"Add Task Instance": "創建臨時任務",
"Modify Task Instance": "管理正在運行的任務",
"View Task Settings": "查看任務設置",
"targets not exist: %s": "有些機器不存在: %s",
"Infrastructure": "基礎設施",
"View Objects": "查看機器",
"Add Object": "添加機器",
"Modify Object": "修改機器",
"Delete Object": "刪除機器",
"Bind Object": "分配未歸組機器",
"User Management": "用戶管理",
"View User List": "查看用戶列表",
"View User Groups": "查看用戶組",
"Add User Group": "添加用戶組",
"Modify User Group": "修改用戶組",
"Delete User Group": "刪除用戶組",
"Business Group Management": "業務組管理",
"View Business Groups": "查看業務組",
"Add Business Group": "添加業務組",
"Modify Business Group": "修改業務組",
"Delete Business Group": "刪除業務組",
"Permission Management": "權限管理",
"View Permission Settings": "查看權限設置",
"Template Center": "模板中心",
"View Built-in Components": "查看內置模板",
"Add Built-in Component": "添加內置模板",
"Modify Built-in Component": "修改內置模板",
"Delete Built-in Component": "刪除內置模板",
"Data Source Management": "數據源管理",
"View Data Source Configuration": "數據源管理",
"System Information": "系統配置",
"View Variable Configuration": "變量配置管理",
"View Version Information": "查看關於產品",
"View Server Information": "查看告警引擎列表",
"View SSO Configuration": "單點登錄管理",
"View Migration Configuration": "查看遷移配置",
"Host - View": "機器 - 查看",
"Host - Modify": "機器 - 修改",
"Host - Delete": "機器 - 删除",
"Host - Bind Uncategorized": "機器 - 綁定未歸組機器到某個業務組",
"Explorer": "數據查詢",
"Metrics Explorer": "指標查詢",
"Quick View": "快捷視圖",
"Built-in Metric - View": "內置指標 - 查看",
"Built-in Metric - Add": "內置指標 - 新增",
"Built-in Metric - Modify": "內置指標 - 修改",
"Built-in Metric - Delete": "內置指標 - 删除",
"Recording Rule - View": "記錄規則 - 查看",
"Recording Rule - Add": "記錄規則 - 新增",
"Recording Rule - Modify": "記錄規則 - 修改",
"Recording Rule - Delete": "記錄規則 - 删除",
"Logs Explorer": "日誌查詢",
"Index Pattern - View": "索引模式 - 查看",
"Index Pattern - Add": "索引模式 - 新增",
"Index Pattern - Modify": "索引模式 - 修改",
"Index Pattern - Delete": "索引模式 - 删除",
"Dashboard - View": "儀表板 - 查看",
"Dashboard - Add": "儀表板 - 新增",
"Dashboard - Modify": "儀表板 - 修改",
"Dashboard - Delete": "儀表板 - 删除",
"Dashboard - View Public": "儀表板 - 查看公開儀表板",
"Alerting": "告警",
"Alerting Rule - View": "告警規則 - 查看",
"Alerting Rule - Add": "告警規則 - 新增",
"Alerting Rule - Modify": "告警規則 - 修改",
"Alerting Rule - Delete": "告警規則 - 删除",
"Mutting Rule - View": "屏蔽規則 - 查看",
"Mutting Rule - Add": "屏蔽規則 - 新增",
"Mutting Rule - Modify": "屏蔽規則 - 修改",
"Mutting Rule - Delete": "屏蔽規則 - 删除",
"Subscribing Rule - View": "訂閱規則 - 查看",
"Subscribing Rule - Add": "訂閱規則 - 新增",
"Subscribing Rule - Modify": "訂閱規則 - 修改",
"Subscribing Rule - Delete": "訂閱規則 - 删除",
"Self-healing-Script - View": "自愈腳本 - 查看",
"Self-healing-Script - Add": "自愈腳本 - 新增",
"Self-healing-Script - Modify": "自愈腳本 - 修改",
"Self-healing-Script - Delete": "自愈腳本 - 删除",
"Self-healing-Job - View": "自愈任務 - 查看",
"Self-healing-Job - Add": "自愈任務 - 新增",
"Self-healing-Job - Modify": "自愈任務 - 修改",
"Active Event - View": "活躍事件 - 查看",
"Active Event - Delete": "活躍事件 - 删除",
"Historical Event - View": "歷史事件 - 查看",
"Notification": "通知",
"Notification Rule - View": "通知規則 - 查看",
"Notification Rule - Add": "通知規則 - 新增",
"Notification Rule - Modify": "通知規則 - 修改",
"Notification Rule - Delete": "通知規則 - 删除",
"Media Type - View": "通知媒介 - 查看",
"Media Type - Add": "通知媒介 - 新增",
"Media Type - Modify": "通知媒介 - 修改",
"Media Type - Delete": "通知媒介 - 删除",
"Message Template - View": "訊息範本 - 查看",
"Message Template - Add": "訊息範本 - 新增",
"Message Template - Modify": "訊息範本 - 修改",
"Message Template - Delete": "訊息範本 - 删除",
"Event Pipeline - View": "事件管線 - 查看",
"Event Pipeline - Add": "事件管線 - 新增",
"Event Pipeline - Modify": "事件管線 - 修改",
"Event Pipeline - Delete": "事件管線 - 删除",
"Notification Settings - View": "老版本通知设置 - 查看",
"Notification Templates - View": "老版本訊息範本 - 查看",
"Integrations": "集成中心",
"Data Source - View": "資料源 - 查看",
"Component - View": "組件 - 查看",
"Component - Add": "組件 - 新增",
"Component - Modify": "組件 - 修改",
"Component - Delete": "組件 - 刪除",
"Embedded Product - View": "系統集成 - 查看",
"Embedded Product - Add": "系統集成 - 新增",
"Embedded Product - Modify": "系統集成 - 修改",
"Embedded Product - Delete": "系統集成 - 刪除",
"Organization": "人員組織",
"User - View": "用戶 - 查看",
"User - Add": "用戶 - 新增",
"User - Modify": "用戶 - 修改",
"User - Delete": "用戶 - 刪除",
"Team - View": "團隊 - 查看",
"Team - Add": "團隊 - 新增",
"Team - Modify": "團隊 - 修改",
"Team - Delete": "團隊 - 刪除",
"Business Group - View": "業務組 - 查看",
"Business Group - Add": "業務組 - 新增",
"Business Group - Modify": "業務組 - 修改",
"Business Group - Delete": "業務組 - 删除",
"Role - View": "角色 - 查看",
"Role - Add": "角色 - 新增",
"Role - Modify": "角色 - 修改",
"Role - Delete": "角色 - 删除",
"System Settings": "系統配置",
"View Site Settings": "查看站點設置",
"View Message Templates": "查看訊息範本",
"Add Message Templates": "新增訊息範本",
"Modify Message Templates": "修改訊息範本",
"Delete Message Templates": "刪除訊息範本",
"View Notify Rules": "查看通知規則",
"Add Notify Rules": "新增通知規則",
"Modify Notify Rules": "修改通知規則",
"Delete Notify Rules": "刪除通知規則",
"View Notify Channels": "查看通知媒介",
"Add Notify Channels": "新增通知媒介",
"Modify Notify Channels": "修改通知媒介",
"Delete Notify Channels": "刪除通知媒介",
"Notify Channels": "通知媒介",
"Notify Rules": "通知規則",
"Message Templates": "訊息範本",
"User Contact Management": "用戶聯絡方式管理"
"View Variable Settings": "查看變量配置",
"View SSO Settings": "查看單點登錄配置",
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看產品版本",
"---------zh_HK--------": "---------zh_HK--------"
},
"ja_JP": {
"Username or password invalid": "ユーザー名またはパスワードが無効です",
@@ -521,109 +375,277 @@ var I18N = `{
"admin role can not be modified": "管理者ロールは変更できません",
"builtin payload already exists": "ビルトインテンプレートは既に存在します",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "この機能はまだ有効になっていません。システム管理者に連絡して有効にしてください",
"Dashboards": "ダッシュボード",
"View Dashboards": "ダッシュボードの表示",
"Add Dashboard": "ダッシュボードの追加",
"Modify Dashboard": "ダッシュボードの修正",
"Delete Dashboard": "ダッシュボードの削除",
"Modify Embedded Dashboard": "埋め込みダッシュボードの修正",
"View Embedded Dashboard": "統合ダッシュボードの表示",
"View Public Dashboard": "公開ダッシュボードの表示",
"Time Series Metrics": "時系列指標",
"View Metric Data": "即時クエリの表示",
"View Object Data": "クイックビューの表示",
"Metric Views": "メトリクスビュー",
"View Built-in Metrics": "ビルトインメトリクスの表示",
"Add Built-in Metric": "ビルトインメトリクスの追加",
"Modify Built-in Metric": "ビルトインメトリクスの修正",
"Delete Built-in Metric": "ビルトインメトリクスの削除",
"Recording Rule Management": "記録ルール",
"View Recording Rules": "記録ルールの表示",
"Add Recording Rule": "記録ルールの追加",
"Modify Recording Rule": "記録ルールの修正",
"Delete Recording Rule": "記録ルールの削除",
"Log Analysis": "ログ分析",
"View Logs": "ログの表示",
"View Index Patterns": "インデックスパターンの表示",
"Alert Rules": "アラートルール",
"View Alert Rules": "アラートルールの表示",
"Add Alert Rule": "アラートルールの追加",
"Modify Alert Rule": "アラートルールの修正",
"Delete Alert Rule": "アラートルールの削除",
"Alert Silence Management": "抑制ルール",
"View Alert Silences": "アラート抑制ルールの表示",
"Add Alert Silence": "アラート抑制ルールの追加",
"Modify Alert Silence": "アラート抑制ルールの修正",
"Delete Alert Silence": "アラート抑制ルールの削除",
"Alert Subscription Management": "購読ルール",
"View Alert Subscriptions": "アラート購読ルールの表示",
"Add Alert Subscription": "アラート購読ルールの追加",
"Modify Alert Subscription": "アラート購読ルールの修正",
"Delete Alert Subscription": "アラート購読ルールの削除",
"Alert Event Management": "アラートイベント管理",
"View Current Alerts": "アクティブアラートの表示",
"Delete Current Alert": "アクティブアラートの削除",
"View Historical Alerts": "過去のアラートの表示",
"Alert Notification": "アラート通知",
"View Notification Settings": "通知設定の管理",
"View Notification Templates": "通知テンプレートの管理",
"Task Management": "タスク管理",
"View Task Templates": "タスクテンプレートの表示",
"Add Task Template": "タスクテンプレートの追加",
"Modify Task Template": "タスクテンプレートの修正",
"Delete Task Template": "タスクテンプレートの削除",
"View Task Instances": "タスク履歴の表示",
"Add Task Instance": "一時的なタスクの作成",
"Modify Task Instance": "実行中のタスクの管理",
"View Task Settings": "タスク設定の表示",
"Infrastructure": "インフラ",
"View Objects": "マシンの表示",
"Add Object": "マシンの追加",
"Modify Object": "マシンの修正",
"Delete Object": "マシンの削除",
"Bind Object": "未グループ化マシンの割り当て",
"User Management": "ユーザー管理",
"View User List": "ユーザーリストの表示",
"View User Groups": "チームの表示",
"Add User Group": "チームの追加",
"Modify User Group": "チームの修正",
"Delete User Group": "チームの削除",
"Business Group Management": "業務組管理",
"View Business Groups": "業務組の表示",
"Add Business Group": "業務組の追加",
"Modify Business Group": "業務組の修正",
"Delete Business Group": "業務組の削除",
"Permission Management": "権限管理",
"View Permission Settings": "権限設定の表示",
"Template Center": "テンプレートセンター",
"View Built-in Components": "内蔵テンプレートの表示",
"Add Built-in Component": "内蔵テンプレートの追加",
"Modify Built-in Component": "内蔵テンプレートの修正",
"Delete Built-in Component": "内蔵テンプレートの削除",
"Data Source Management": "データソース管理",
"View Data Source Configuration": "データソース管理",
"System Information": "システム設定",
"View Variable Configuration": "変数設定管理",
"View Version Information": "製品情報",
"View Server Information": "アラートエンジン一覧",
"View SSO Configuration": "シングルサインオン管理",
"View Migration Configuration": "移行設定の表示",
"targets not exist: %s": "いくつかのマシンが存在しません: %s",
"Infrastructure": "インフラストラクチャ",
"Host - View": "機器 - 閲覧",
"Host - Modify": "機器 - 修正",
"Host - Delete": "機器 - 削除",
"Host - Bind Uncategorized": "機器 - グループ未所属の機器をある業務グループにバインドする",
"Explorer": "データ検索",
"Metrics Explorer": "メトリクス エクスプローラー",
"Quick View": "クイック ビュー",
"Built-in Metric - View": "組み込みメトリクス - 閲覧",
"Built-in Metric - Add": "組み込みメトリクス - 追加",
"Built-in Metric - Modify": "組み込みメトリクス - 修正",
"Built-in Metric - Delete": "組み込みメトリクス - 削除",
"Recording Rule - View": "記録ルール - 閲覧",
"Recording Rule - Add": "記録ルール - 追加",
"Recording Rule - Modify": "記録ルール - 修正",
"Recording Rule - Delete": "記録ルール - 削除",
"Logs Explorer": "ログ エクスプローラー",
"Index Pattern - View": "インデックス パターン - 閲覧",
"Index Pattern - Add": "インデックス パターン - 追加",
"Index Pattern - Modify": "インデックス パターン - 修正",
"Index Pattern - Delete": "インデックス パターン - 削除",
"Dashboard - View": "ダッシュボード - 閲覧",
"Dashboard - Add": "ダッシュボード - 追加",
"Dashboard - Modify": "ダッシュボード - 修正",
"Dashboard - Delete": "ダッシュボード - 削除",
"Dashboard - View Public": "ダッシュボード - 公開されたダッシュボードを見る",
"Alerting": "アラート",
"Alerting Rule - View": "アラートルール - 閲覧",
"Alerting Rule - Add": "アラートルール - 追加",
"Alerting Rule - Modify": "アラートルール - 修正",
"Alerting Rule - Delete": "アラートルール - 削除",
"Mutting Rule - View": "抑制ルール - 閲覧",
"Mutting Rule - Add": "抑制ルール - 追加",
"Mutting Rule - Modify": "抑制ルール - 修正",
"Mutting Rule - Delete": "抑制ルール - 削除",
"Subscribing Rule - View": "購読ルール - 閲覧",
"Subscribing Rule - Add": "購読ルール - 追加",
"Subscribing Rule - Modify": "購読ルール - 修正",
"Subscribing Rule - Delete": "購読ルール - 削除",
"Self-healing-Script - View": "タスクテンプレート - 閲覧",
"Self-healing-Script - Add": "タスクテンプレート - 追加",
"Self-healing-Script - Modify": "タスクテンプレート - 修正",
"Self-healing-Script - Delete": "タスクテンプレート - 削除",
"Self-healing-Job - View": "一時的なタスク - 閲覧",
"Self-healing-Job - Add": "一時的なタスク - 追加",
"Self-healing-Job - Modify": "一時的なタスク - 修正",
"Active Event - View": "アクティブアラート - 閲覧",
"Active Event - Delete": "アクティブアラート - 削除",
"Historical Event - View": "過去のアラート - 閲覧",
"Notification": "通知",
"Notification Rule - View": "通知ルール - 閲覧",
"Notification Rule - Add": "通知ルール - 追加",
"Notification Rule - Modify": "通知ルール - 修正",
"Notification Rule - Delete": "通知ルール - 削除",
"Media Type - View": "通知メディア - 閲覧",
"Media Type - Add": "通知メディア - 追加",
"Media Type - Modify": "通知メディア - 修正",
"Media Type - Delete": "通知メディア - 削除",
"Message Template - View": "メッセージテンプレート - 閲覧",
"Message Template - Add": "メッセージテンプレート - 追加",
"Message Template - Modify": "メッセージテンプレート - 修正",
"Message Template - Delete": "メッセージテンプレート - 削除",
"Event Pipeline - View": "イベント パイプライン - 閲覧",
"Event Pipeline - Add": "イベント パイプライン - 追加",
"Event Pipeline - Modify": "イベント パイプライン - 修正",
"Event Pipeline - Delete": "イベント パイプライン - 削除",
"Notification Settings - View": "旧バージョンの通知設定 - 閲覧",
"Notification Templates - View": "旧バージョンのメッセージテンプレート - 閲覧",
"Integrations": "統合センター",
"Data Source - View": "データソース - 閲覧",
"Component - View": "コンポーネント - 閲覧",
"Component - Add": "コンポーネント - 追加",
"Component - Modify": "コンポーネント - 修正",
"Component - Delete": "コンポーネント - 削除",
"Embedded Product - View": "システム統合 - 閲覧",
"Embedded Product - Add": "システム統合 - 追加",
"Embedded Product - Modify": "システム統合 - 修正",
"Embedded Product - Delete": "システム統合 - 削除",
"Organization": "組織",
"User - View": "ユーザー - 閲覧",
"User - Add": "ユーザー - 追加",
"User - Modify": "ユーザー - 修正",
"User - Delete": "ユーザー - 削除",
"Team - View": "チーム - 閲覧",
"Team - Add": "チーム - 追加",
"Team - Modify": "チーム - 修正",
"Team - Delete": "チーム - 削除",
"Business Group - View": "業務グループ - 閲覧",
"Business Group - Add": "業務グループ - 追加",
"Business Group - Modify": "業務グループ - 修正",
"Business Group - Delete": "業務グループ - 削除",
"Role - View": "役割 - 閲覧",
"Role - Add": "役割 - 追加",
"Role - Modify": "役割 - 修正",
"Role - Delete": "役割 - 削除",
"System Settings": "システム設定",
"View Site Settings": "サイト設定の表示",
"View Message Templates": "メッセージテンプレートを表示",
"Add Message Templates": "メッセージテンプレートを追加する",
"Modify Message Templates": "メッセージテンプレートを変更する",
"Delete Message Templates": "メッセージテンプレートの削除",
"View Notify Rules": "通知ルールを表示",
"Add Notify Rules": "通知ルールを追加する",
"Modify Notify Rules": "通知ルールを変更する",
"Delete Notify Rules": "通知ルールの削除",
"View Notify Channels": "通知媒体を表示",
"Add Notify Channels": "通知媒体を追加",
"Modify Notify Channels": "通知媒体を変更する",
"Delete Notify Channels": "通知媒体を削除する",
"Notify Channels": "通知媒体",
"Notify Rules": "通知ルール",
"Message Templates": "メッセージテンプレート",
"User Contact Management": "ユーザー連絡先管理"
"View Variable Settings": "変数設定の表示",
"View SSO Settings": "シングルサインオン設定の表示",
"View Alerting Engines": "アラートエンジンの表示",
"View Product Version": "製品のバージョンを見る",
"---------ja_JP--------": "---------ja_JP--------"
},
"ru_RU": {
"Username or password invalid": "Неверное имя пользователя или пароль",
"incorrect verification code": "Неверный код подтверждения",
"roles empty": "Роли не могут быть пустыми",
"Username already exists": "Это имя пользователя уже существует. Пожалуйста, используйте другое",
"failed to count user-groups": "Ошибка проверки данных. Пожалуйста, повторите попытку",
"UserGroup already exists": "Имя группы уже существует. Пожалуйста, используйте другое",
"members empty": "Участники не могут быть пустыми",
"At least one team have rw permission": "По крайней мере одна команда должна иметь права на чтение и запись",
"Failed to create BusiGroup(%s)": "Не удалось создать бизнес-группу [%s]. Пожалуйста, повторите попытку",
"business group id invalid": "Неверный идентификатор бизнес-группы",
"idents empty": "Объекты мониторинга не могут быть пустыми",
"invalid tag(%s)": "Тег [%s] недействителен",
"invalid tagkey(%s): cannot contains . ": "Ключ тега [%s] не может содержать точку (.)",
"invalid tagkey(%s): cannot contains _ ": "Ключ тега [%s] не может содержать подчеркивание (_)",
"invalid tagkey(%s)": "Ключ тега [%s] недействителен",
"duplicate tagkey(%s)": "Ключ тега (%s) дублируется",
"name is empty": "Имя не может быть пустым",
"Ident duplicate": "Уникальный идентификатор панели мониторинга уже существует",
"No such dashboard": "Панель мониторинга не найдена",
"Name has invalid characters": "Имя содержит недопустимые символы",
"Name is blank": "Имя не может быть пустым",
"forbidden": "Нет доступа",
"builtin alerts is empty, file: %s": "Встроенный шаблон оповещений пуст %s",
"input json is empty": "Предоставленные данные не могут быть пустыми",
"fields empty": "Выбранные поля не могут быть пустыми",
"No such AlertRule": "Правило оповещения не найдено",
"GroupId(%d) invalid": "Неверный идентификатор бизнес-группы",
"No such recording rule": "Правило записи не найдено",
"tags is blank": "Теги не могут быть пустыми",
"oops... etime(%d) <= btime(%d)": "Время начала не может быть позже времени окончания",
"group_id invalid": "Бизнес-группа недействительна",
"No such AlertMute": "Правило отключения оповещений не найдено",
"rule_id and tags are both blank": "Правило оповещения и теги не могут быть пустыми одновременно",
"rule is blank": "Правило не может быть пустым",
"rule invalid": "Правило недействительно. Проверьте правильность ввода",
"unsupported field: %s": "Поле %s не поддерживается",
"arg(batch) should be nonnegative": "Параметр 'batch' должен быть неотрицательным",
"arg(tolerance) should be nonnegative": "Параметр 'tolerance' должен быть неотрицательным",
"arg(timeout) should be nonnegative": "Параметр 'timeout' должен быть неотрицательным",
"arg(timeout) longer than five days": "Параметр 'timeout' не может превышать 5 дней",
"arg(title) is required": "Параметр 'title' является обязательным",
"created task.id is zero": "Идентификатор задачи равен нулю",
"invalid ibex address: %s": "Неверный адрес ibex %s",
"url path invalid": "Неверный URL-путь",
"no such server": "Экземпляр не найден",
"admin role can not be modified": "Роль администратора не может быть изменена",
"builtin payload already exists": "Встроенный шаблон уже существует",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "Эта функция не активирована. Пожалуйста, обратитесь к системному администратору для активации",
"targets not exist: %s": "Некоторые машины не существуют: %s",
"Infrastructure": "Инфраструктура",
"Host - View": "Хост - Просмотр",
"Host - Modify": "Хост - Изменить",
"Host - Delete": "Хост - Удалить",
"Host - Bind Uncategorized": "Хост - Привязать неразмеченные хосты к бизнес-группе",
"Explorer": "Поиск данных",
"Metrics Explorer": "Поиск метрик",
"Quick View": "Быстрый просмотр",
"Built-in Metric - View": "Встроенные метрики - Просмотр",
"Built-in Metric - Add": "Встроенные метрики - Добавить",
"Built-in Metric - Modify": "Встроенные метрики - Изменить",
"Built-in Metric - Delete": "Встроенные метрики - Удалить",
"Recording Rule - View": "Правила записи - Просмотр",
"Recording Rule - Add": "Правила записи - Добавить",
"Recording Rule - Modify": "Правила записи - Изменить",
"Recording Rule - Delete": "Правила записи - Удалить",
"Logs Explorer": "Поиск логов",
"Index Pattern - View": "Шаблоны индексов - Просмотр",
"Index Pattern - Add": "Шаблоны индексов - Добавить",
"Index Pattern - Modify": "Шаблоны индексов - Изменить",
"Index Pattern - Delete": "Шаблоны индексов - Удалить",
"Dashboard - View": "Панель мониторинга - Просмотр",
"Dashboard - Add": "Панель мониторинга - Добавить",
"Dashboard - Modify": "Панель мониторинга - Изменить",
"Dashboard - Delete": "Панель мониторинга - Удалить",
"Dashboard - View Public": "Панель мониторинга - Просмотр публичных панелей",
"Alerting": "Оповещения",
"Alerting Rule - View": "Правила оповещений - Просмотр",
"Alerting Rule - Add": "Правила оповещений - Добавить",
"Alerting Rule - Modify": "Правила оповещений - Изменить",
"Alerting Rule - Delete": "Правила оповещений - Удалить",
"Mutting Rule - View": "Правила отключения оповещений - Просмотр",
"Mutting Rule - Add": "Правила отключения оповещений - Добавить",
"Mutting Rule - Modify": "Правила отключения оповещений - Изменить",
"Mutting Rule - Delete": "Правила отключения оповещений - Удалить",
"Subscribing Rule - View": "Правила подписки - Просмотр",
"Subscribing Rule - Add": "Правила подписки - Добавить",
"Subscribing Rule - Modify": "Правила подписки - Изменить",
"Subscribing Rule - Delete": "Правила подписки - Удалить",
"Self-healing-Script - View": "Скрипты самоисцеления - Просмотр",
"Self-healing-Script - Add": "Скрипты самоисцеления - Добавить",
"Self-healing-Script - Modify": "Скрипты самоисцеления - Изменить",
"Self-healing-Script - Delete": "Скрипты самоисцеления - Удалить",
"Self-healing-Job - View": "Задачи самоисцеления - Просмотр",
"Self-healing-Job - Add": "Задачи самоисцеления - Добавить",
"Self-healing-Job - Modify": "Задачи самоисцеления - Изменить",
"Active Event - View": "Активные события - Просмотр",
"Active Event - Delete": "Активные события - Удалить",
"Historical Event - View": "Исторические события - Просмотр",
"Notification": "Уведомления",
"Notification Rule - View": "Правила уведомлений - Просмотр",
"Notification Rule - Add": "Правила уведомлений - Добавить",
"Notification Rule - Modify": "Правила уведомлений - Изменить",
"Notification Rule - Delete": "Правила уведомлений - Удалить",
"Media Type - View": "Типы уведомлений - Просмотр",
"Media Type - Add": "Типы уведомлений - Добавить",
"Media Type - Modify": "Типы уведомлений - Изменить",
"Media Type - Delete": "Типы уведомлений - Удалить",
"Message Template - View": "Шаблоны сообщений - Просмотр",
"Message Template - Add": "Шаблоны сообщений - Добавить",
"Message Template - Modify": "Шаблоны сообщений - Изменить",
"Message Template - Delete": "Шаблоны сообщений - Удалить",
"Event Pipeline - View": "Конвейер событий - Просмотр",
"Event Pipeline - Add": "Конвейер событий - Добавить",
"Event Pipeline - Modify": "Конвейер событий - Изменить",
"Event Pipeline - Delete": "Конвейер событий - Удалить",
"Notification Settings - View": "Настройки уведомлений (старый вариант) - Просмотр",
"Notification Templates - View": "Шаблоны уведомлений (старый вариант) - Просмотр",
"Integrations": "Центр интеграций",
"Data Source - View": "Источники данных - Просмотр",
"Component - View": "Компоненты - Просмотр",
"Component - Add": "Компоненты - Добавить",
"Component - Modify": "Компоненты - Изменить",
"Component - Delete": "Компоненты - Удалить",
"Embedded Product - View": "Встроенные продукты - Просмотр",
"Embedded Product - Add": "Встроенные продукты - Добавить",
"Embedded Product - Modify": "Встроенные продукты - Изменить",
"Embedded Product - Delete": "Встроенные продукты - Удалить",
"Organization": "Организация",
"User - View": "Пользователи - Просмотр",
"User - Add": "Пользователи - Добавить",
"User - Modify": "Пользователи - Изменить",
"User - Delete": "Пользователи - Удалить",
"Team - View": "Команды - Просмотр",
"Team - Add": "Команды - Добавить",
"Team - Modify": "Команды - Изменить",
"Team - Delete": "Команды - Удалить",
"Business Group - View": "Бизнес-группы - Просмотр",
"Business Group - Add": "Бизнес-группы - Добавить",
"Business Group - Modify": "Бизнес-группы - Изменить",
"Business Group - Delete": "Бизнес-группы - Удалить",
"Role - View": "Роли - Просмотр",
"Role - Add": "Роли - Добавить",
"Role - Modify": "Роли - Изменить",
"Role - Delete": "Роли - Удалить",
"System Settings": "Настройки системы",
"View Site Settings": "Просмотр настроек сайта",
"View Variable Settings": "Просмотр переменных",
"View SSO Settings": "Просмотр настроек единого входа",
"View Alerting Engines": "Просмотр списка алертинг-инженеров",
"View Product Version": "Просмотр версии продукта",
"---------ru_RU--------": "---------ru_RU--------"
}
}`

View File

@@ -187,16 +187,16 @@ func checkSqliteDatabaseExist(c DBConfig) (bool, error) {
func checkPostgresDatabaseExist(c DBConfig) (bool, error) {
dsnParts := strings.Split(c.DSN, " ")
dbName := ""
dbpair := ""
for _, part := range dsnParts {
if strings.HasPrefix(part, "dbname=") {
dbName = part[strings.Index(part, "=")+1:]
dbpair = part
}
}
connectionStr := strings.Replace(c.DSN, dbpair, "dbname=postgres", 1)
dialector := postgres.Open(connectionStr)
dbName := ""
dbpair := ""
for _, part := range dsnParts {
if strings.HasPrefix(part, "dbname=") {
dbName = part[strings.Index(part, "=")+1:]
dbpair = part
}
}
connectionStr := strings.Replace(c.DSN, dbpair, "dbname=postgres", 1)
dialector := postgres.Open(connectionStr)
gconfig := &gorm.Config{
NamingStrategy: schema.NamingStrategy{
@@ -362,6 +362,5 @@ func New(c DBConfig) (*gorm.DB, error) {
sqlDB.SetMaxOpenConns(c.MaxOpenConns)
sqlDB.SetConnMaxLifetime(time.Duration(c.MaxLifetime) * time.Second)
}
return db, nil
}

View File

@@ -9,6 +9,8 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/pstat"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
@@ -17,16 +19,18 @@ import (
type Set struct {
sync.Mutex
items map[string]struct{}
redis storage.Redis
ctx *ctx.Context
items map[string]struct{}
redis storage.Redis
ctx *ctx.Context
configs pconf.Pushgw
}
func New(ctx *ctx.Context, redis storage.Redis) *Set {
func New(ctx *ctx.Context, redis storage.Redis, configs pconf.Pushgw) *Set {
set := &Set{
items: make(map[string]struct{}),
redis: redis,
ctx: ctx,
items: make(map[string]struct{}),
redis: redis,
ctx: ctx,
configs: configs,
}
set.Init()
@@ -95,9 +99,13 @@ type TargetUpdate struct {
}
func (s *Set) UpdateTargets(lst []string, now int64) error {
err := updateTargetsUpdateTs(lst, now, s.redis)
if len(lst) == 0 {
return nil
}
err := s.updateTargetsUpdateTs(lst, now, s.redis)
if err != nil {
logger.Errorf("failed to update targets:%v update_ts: %v", lst, err)
logger.Errorf("update_ts: failed to update targets: %v error: %v", lst, err)
}
if !s.ctx.IsCenter {
@@ -109,20 +117,6 @@ func (s *Set) UpdateTargets(lst []string, now int64) error {
return err
}
count := int64(len(lst))
if count == 0 {
return nil
}
ret := s.ctx.DB.Table("target").Where("ident in ?", lst).Update("update_at", now)
if ret.Error != nil {
return ret.Error
}
if ret.RowsAffected == count {
return nil
}
// there are some idents not found in db, so insert them
var exists []string
err = s.ctx.DB.Table("target").Where("ident in ?", lst).Pluck("ident", &exists).Error
@@ -134,24 +128,27 @@ func (s *Set) UpdateTargets(lst []string, now int64) error {
for i := 0; i < len(news); i++ {
err = s.ctx.DB.Exec("INSERT INTO target(ident, update_at) VALUES(?, ?)", news[i], now).Error
if err != nil {
logger.Error("failed to insert target:", news[i], "error:", err)
logger.Error("upsert_target: failed to insert target:", news[i], "error:", err)
}
}
// 从批量更新一批机器的时间戳改成逐台更新是为了避免批量更新时mysql的锁竞争问题
for i := 0; i < len(exists); i++ {
err = s.ctx.DB.Exec("UPDATE target SET update_at = ? WHERE ident = ?", now, exists[i]).Error
if err != nil {
logger.Error("upsert_target: failed to update target:", exists[i], "error:", err)
}
}
return nil
}
func updateTargetsUpdateTs(lst []string, now int64, redis storage.Redis) error {
func (s *Set) updateTargetsUpdateTs(lst []string, now int64, redis storage.Redis) error {
if redis == nil {
return fmt.Errorf("redis is nil")
}
count := int64(len(lst))
if count == 0 {
return nil
}
newMap := make(map[string]interface{}, count)
newMap := make(map[string]interface{}, len(lst))
for _, ident := range lst {
hostUpdateTime := models.HostUpdteTime{
UpdateTime: now,
@@ -160,6 +157,78 @@ func updateTargetsUpdateTs(lst []string, now int64, redis storage.Redis) error {
newMap[models.WrapIdentUpdateTime(ident)] = hostUpdateTime
}
err := storage.MSet(context.Background(), redis, newMap)
return s.updateTargetTsInRedis(newMap, redis)
}
func (s *Set) updateTargetTsInRedis(newMap map[string]interface{}, redis storage.Redis) (err error) {
if len(newMap) == 0 {
return nil
}
timeout := time.Duration(s.configs.UpdateTargetTimeoutMills) * time.Millisecond
batchSize := s.configs.UpdateTargetBatchSize
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
if len(newMap) <= batchSize {
// 如果 newMap 的内容小于等于 batchSize则直接执行 MSet
return s.writeTargetTsInRedis(ctx, redis, newMap)
}
i := 0
batchMap := make(map[string]interface{}, batchSize)
for mapKey := range newMap {
batchMap[mapKey] = newMap[mapKey]
if (i+1)%batchSize == 0 {
if e := s.writeTargetTsInRedis(ctx, redis, batchMap); e != nil {
err = e
}
batchMap = make(map[string]interface{}, batchSize)
}
i++
}
if len(batchMap) > 0 {
if e := s.writeTargetTsInRedis(ctx, redis, batchMap); e != nil {
err = e
}
}
return err
}
func (s *Set) writeTargetTsInRedis(ctx context.Context, redis storage.Redis, content map[string]interface{}) error {
retryCount := s.configs.UpdateTargetRetryCount
retryInterval := time.Duration(s.configs.UpdateTargetRetryIntervalMills) * time.Millisecond
keys := make([]string, 0, len(content))
for k := range content {
keys = append(keys, k)
}
for i := 0; i < retryCount; i++ {
start := time.Now()
err := storage.MSet(ctx, redis, content)
duration := time.Since(start).Seconds()
logger.Debugf("update_ts: write target ts in redis, keys: %v, retryCount: %d, retryInterval: %v, error: %v", keys, retryCount, retryInterval, err)
if err == nil {
pstat.RedisOperationLatency.WithLabelValues("mset_target_ts", "success").Observe(duration)
return nil
} else {
logger.Errorf("update_ts: failed to write target ts in redis: %v, keys: %v, retry %d/%d", err, keys, i+1, retryCount)
}
if i < retryCount-1 {
// 最后一次尝试的时候不需要 sleep之前的尝试如果失败了都需要完事之后 sleep
time.Sleep(retryInterval)
}
if i == retryCount-1 {
// 记录最后一次的失败情况
pstat.RedisOperationLatency.WithLabelValues("mset_target_ts", "fail").Observe(duration)
}
}
return fmt.Errorf("failed to write target ts in redis after %d retries, keys: %v", retryCount, keys)
}

View File

@@ -14,6 +14,11 @@ import (
)
type Pushgw struct {
UpdateTargetRetryCount int
UpdateTargetRetryIntervalMills int64
UpdateTargetTimeoutMills int64
UpdateTargetBatchSize int
BusiGroupLabelKey string
IdentMetrics []string
IdentStatsThreshold int
@@ -33,7 +38,7 @@ type WriterGlobalOpt struct {
QueuePopSize int
QueueNumber int // 每个 writer 固定数量的队列
QueueWaterMark float64 // 队列将满,开始丢弃数据的水位,比如 0.8
AllQueueMaxSize int64
AllQueueMaxSize int64 // 自动计算得到,无需配置
AllQueueMaxSizeInterval int
RetryCount int
RetryInterval int64
@@ -103,6 +108,22 @@ type RelabelConfig struct {
}
func (p *Pushgw) PreCheck() {
if p.UpdateTargetRetryCount <= 0 {
p.UpdateTargetRetryCount = 3
}
if p.UpdateTargetRetryIntervalMills <= 0 {
p.UpdateTargetRetryIntervalMills = 500
}
if p.UpdateTargetTimeoutMills <= 0 {
p.UpdateTargetTimeoutMills = 3000
}
if p.UpdateTargetBatchSize <= 0 {
p.UpdateTargetBatchSize = 20
}
if p.BusiGroupLabelKey == "" {
p.BusiGroupLabelKey = "busigroup"
}

View File

@@ -1,4 +1,4 @@
package writer
package pstat
import "github.com/prometheus/client_golang/prometheus"
@@ -8,7 +8,36 @@ const (
)
var (
// 发往后端TSDB延迟如何
CounterSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "samples_received_total",
Help: "Total number samples received.",
}, []string{"channel"})
CounterDropSampleTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "drop_sample_total",
Help: "Number of drop sample.",
})
CounterSampleReceivedByIdent = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sample_received_by_ident",
Help: "Number of sample push by ident.",
}, []string{"host_ident"})
RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, []string{"service", "code", "path", "method"},
)
ForwardDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
@@ -65,10 +94,25 @@ var (
Name: "push_queue_over_limit_error_total",
Help: "Number of push queue over limit.",
})
RedisOperationLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "redis_operation_latency_seconds",
Help: "Histogram of latencies for Redis operations",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5},
},
[]string{"operation", "status"},
)
)
func init() {
prometheus.MustRegister(
CounterSampleTotal,
CounterDropSampleTotal,
CounterSampleReceivedByIdent,
RequestDuration,
ForwardDuration,
ForwardKafkaDuration,
CounterWirteTotal,
@@ -76,5 +120,6 @@ func init() {
CounterPushQueueErrorTotal,
GaugeSampleQueueSize,
CounterPushQueueOverLimitTotal,
RedisOperationLatency,
)
}

View File

@@ -42,7 +42,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
}
idents := idents.New(ctx, redis)
idents := idents.New(ctx, redis, config.Pushgw)
metas := metas.New(redis)
stats := memsto.NewSyncStats()

View File

@@ -3,6 +3,7 @@ package router
import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pushgw/pstat"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/logger"
@@ -152,7 +153,7 @@ func (rt *Router) ForwardToQueue(clientIP string, queueid string, v *prompb.Time
}
if rt.DropSample(v) {
CounterDropSampleTotal.Inc()
pstat.CounterDropSampleTotal.Inc()
return nil
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/pstat"
"github.com/ccfos/nightingale/v6/pushgw/writer"
)
@@ -42,8 +43,7 @@ func stat() gin.HandlerFunc {
method := c.Request.Method
labels := []string{"pushgw", code, c.FullPath(), method}
RequestCounter.WithLabelValues(labels...).Inc()
RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
pstat.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
}
}
@@ -75,8 +75,6 @@ func (rt *Router) Config(r *gin.Engine) {
return
}
registerMetrics()
r.Use(stat())
// datadog url: http://n9e-pushgw.foo.com/datadog
// use apiKey not basic auth

Some files were not shown because too many files have changed in this diff Show More