Skip to content

Commit

Permalink
fixed bugs. change nacos monitor to prometheus. (#880)
Browse files Browse the repository at this point in the history
* fixed bugs occurred when there are two consecutive slashes in prometheus response.
change nacos monitor from json to prometheus.

* add prometheus test

---------

Signed-off-by: vinci <55838224+vinci-897@users.noreply.github.com>
Co-authored-by: tomsun28 <tomsun28@outlook.com>
  • Loading branch information
leo-934 and tomsun28 authored Apr 18, 2023
1 parent cdd88f4 commit bf3ee39
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ private String readTokenAsLabelValue(StrBuffer buffer) {
default:
throw new ParseException("parse label value error");
}
escaped = false;
} else {
switch (c) {
case QUOTES:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ class ExporterParserTest {

@Test
void textToMetric() {
String resp = "# HELP go_gc_cycles_automatic_gc_cycles_total Count of completed GC cycles generated by the Go runtime.\n" +
String resp = "# HELP disk_total_bytes Total space for path\n" +
"# TYPE disk_total_bytes gauge\n" +
"disk_total_bytes{path=\"C:\\\\hertzbeat\\\\repo\\\\testpath\",} 4.29496725504E11\n" +
"# HELP go_gc_cycles_automatic_gc_cycles_total Count of completed GC cycles generated by the Go runtime.\n" +
"# TYPE go_gc_cycles_automatic_gc_cycles_total counter\n" +
"go_gc_cycles_automatic_gc_cycles_total 0\n" +
"# HELP go_gc_cycles_forced_gc_cycles_total Count of completed GC cycles forced by the application.\n" +
Expand Down Expand Up @@ -42,7 +45,7 @@ void textToMetric() {
"# EOF";
ExporterParser parser = new ExporterParser();
Map<String, MetricFamily> metricFamilyMap = parser.textToMetric(resp);
assertEquals(6, metricFamilyMap.size());
assertEquals(7, metricFamilyMap.size());
assertEquals(5, metricFamilyMap.get("go_gc_duration_seconds").getMetricList().get(0).getSummary().getQuantileList().size());
}
}
133 changes: 50 additions & 83 deletions manager/src/main/resources/define/app-nacos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,118 +65,85 @@ params:
# collect metrics config list
# 采集指标组配置列表
metrics:
# metrics - system
# 监控指标组 - system
- name: system
- name: system_cpu_usage
# metrics group scheduling priority(0->127)->(high->low), metrics with the same priority will be scheduled in parallel
# priority 0's metrics group is availability metrics, it will be scheduled first, only availability metrics collect success will the scheduling continue
# 指标组调度优先级(0->127)->(优先级高->低) 优先级低的指标组会等优先级高的指标组采集完成后才会被调度, 相同优先级的指标组会并行调度采集
# 优先级为0的指标组为可用性指标组,即它会被首先调度,采集成功才会继续调度其它指标组,采集失败则中断调度
priority: 0
# collect metrics content
# 具体监控指标列表
fields:
# field-metric name, type-metric type(0-number,1-string), unit-metric unit('%','ms','MB'), instance-if is metrics group unique identifier
# field-指标名称, type-指标类型(0-number数字,1-string字符串), unit-指标单位('%','ms','MB'), instance-是否是指标集合唯一标识符字段
- field: system_cpu_usage
- field: usage
type: 0
unit: '%'
- field: system_average_load
type: 0
- field: system_memory_usage
type: 0
unit: '%'
# (optional)metrics field alias name, it is used as an alias field to map and convert the collected data and metrics field
# (可选)监控指标别名, 做为中间字段与采集数据字段和指标字段映射转换
aliasFields:
- cpu
- load
- mem
# mapping and conversion expressions, use these and aliasField above to calculate metrics value
# (可选)指标映射转换计算表达式,与上面的别名一起作用,计算出最终需要的指标值
# eg: cores=core1+core2, usage=usage, waitTime=allTime-runningTime
- value
calculates:
- system_cpu_usage=cpu * 100
- system_average_load=load
- system_memory_usage=mem * 100
# the protocol used for monitoring, eg: sql, ssh, http, telnet, wmi, snmp, sdk
# 监控使用协议, 例如: sql, ssh, http, telnet, wmi, snmp, sdk
- usage=value
protocol: http
# the config content when protocol is http
# 当使用http协议时的配置内容
http:
# http host: ipv4 ipv6 domain
# http 主机:ipv4 ipv6域名
host: ^_^host^_^
# http port
# http 端口
port: ^_^port^_^
# http url
url: /nacos/v2/ns/operator/metrics?onlyStatus=false
# http method: GET POST PUT DELETE PATCH
# http 请求方法: GET POST PUT DELETE PATCH
method: GET
# http response data parse type: default-hertzbeat rule, jsonpath-jsonpath script, website-for website monitoring, prometheus-prometheus exporter rule
# http 响应数据解析方式: default-系统规则, jsonPath-jsonPath脚本, website-网站可用性指标监控, prometheus-Prometheus数据规则
parseType: jsonPath
parseScript: $.data

- name: jvm_gc_count
priority: 1
fields:
- field: total_gc_count
type: 0
aliasFields:
- value
calculates:
- total_gc_count=value
protocol: http
http:
host: ^_^host^_^
port: ^_^port^_^
url: /nacos/actuator/metrics/jvm.gc.pause
method: GET
parseType: jsonPath
parseScript: $.measurements[0]
# http host: ipv4 ipv6 domain
# http 主机:ipv4 ipv6域名
host: ^_^host^_^
# http port
# http 端口
port: ^_^port^_^
# http url
url: /nacos/actuator/prometheus
# http method: GET POST PUT DELETE PATCH
# http 请求方法: GET POST PUT DELETE PATCH
method: GET
# http response data parse type: default-hertzbeat rule, jsonpath-jsonpath script, website-for website monitoring, prometheus-prometheus exporter rule
# http 响应数据解析方式: default-系统规则, jsonPath-jsonPath脚本, website-网站可用性指标监控, prometheus-Prometheus数据规则
parseType: prometheus

- name: jvm_gc_time
- name: jvm_memory_used_bytes
priority: 1
fields:
- field: total_gc_time
- field: area
type: 1
- field: id
type: 1
instance: true
- field: value
type: 0
unit: 's'
aliasFields:
- value
calculates:
- total_gc_time=value
unit: MB
units:
- value=B->MB
# 监控采集使用协议 eg: sql, ssh, http, telnet, wmi, snmp, sdk
protocol: http
# 当protocol为http协议时具体的采集配置
http:
# 主机host: ipv4 ipv6 域名
host: ^_^host^_^
# 端口
port: ^_^port^_^
url: /nacos/actuator/metrics/jvm.gc.pause
# url请求接口路径
url: /nacos/actuator/prometheus
timeout: ^_^timeout^_^
method: GET
parseType: jsonPath
parseScript: $.measurements[1]
parseType: prometheus

- name: nacos
- name: nacos_monitor
priority: 1
fields:
- field: service_count
type: 0
- field: client_count
- field: module
type: 1
- field: name
type: 1
- field: value
type: 0
aliasFields:
- serviceCount
- clientCount
calculates:
- service_count=serviceCount
- client_count=clientCount
# 监控采集使用协议 eg: sql, ssh, http, telnet, wmi, snmp, sdk
protocol: http
# 当protocol为http协议时具体的采集配置
http:
# 主机host: ipv4 ipv6 域名
host: ^_^host^_^
# 端口
port: ^_^port^_^
url: /nacos/v2/ns/operator/metrics?onlyStatus=false
# url请求接口路径
url: /nacos/actuator/prometheus
timeout: ^_^timeout^_^
method: GET
parseType: jsonPath
parseScript: $.data

parseType: prometheus

0 comments on commit bf3ee39

Please sign in to comment.