-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add loki resource usage dashboard for read and write path
- Loading branch information
1 parent
0107a11
commit e633943
Showing
5 changed files
with
306 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
_config+:: { | ||
// Tags for dashboards. | ||
tags: ['loki'], | ||
|
||
// The label used to differentiate between different application instances (i.e. 'pod' in a kubernetes install). | ||
per_instance_label: 'pod', | ||
|
||
// The label used to differentiate between different nodes (i.e. servers). | ||
per_node_label: 'instance', | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
(import 'config.libsonnet') + | ||
(import 'dashboards/loki-chunks.libsonnet') + | ||
(import 'dashboards/loki-logs.libsonnet') + | ||
(import 'dashboards/loki-operational.libsonnet') + | ||
(import 'dashboards/loki-reads.libsonnet') + | ||
(import 'dashboards/loki-writes.libsonnet') | ||
(import 'dashboards/loki-writes.libsonnet') + | ||
(import 'dashboards/loki-writes-resources.libsonnet') + | ||
(import 'dashboards/loki-reads-resources.libsonnet') | ||
|
98 changes: 98 additions & 0 deletions
98
production/loki-mixin/dashboards/dashboard-utils.libsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
local utils = import 'mixin-utils/utils.libsonnet'; | ||
|
||
(import 'grafana-builder/grafana.libsonnet') { | ||
// Override the dashboard constructor to add: | ||
// - default tags, | ||
// - some links that propagate the selectred cluster. | ||
dashboard(title):: | ||
super.dashboard(title) + { | ||
addRowIf(condition, row):: | ||
if condition | ||
then self.addRow(row) | ||
else self, | ||
|
||
addClusterSelectorTemplates(multi=true):: | ||
local d = self { | ||
tags: $._config.tags, | ||
links: [ | ||
{ | ||
asDropdown: true, | ||
icon: 'external link', | ||
includeVars: true, | ||
keepTime: true, | ||
tags: $._config.tags, | ||
targetBlank: false, | ||
title: 'Loki Dashboards', | ||
type: 'dashboards', | ||
}, | ||
], | ||
}; | ||
|
||
if multi then | ||
d.addMultiTemplate('cluster', 'loki_build_info', 'cluster') | ||
.addMultiTemplate('namespace', 'loki_build_info', 'namespace') | ||
else | ||
d.addTemplate('cluster', 'loki_build_info', 'cluster') | ||
.addTemplate('namespace', 'loki_build_info', 'namespace'), | ||
|
||
}, | ||
|
||
jobMatcher(job):: | ||
'cluster=~"$cluster", job=~"($namespace)/%s"' % job, | ||
|
||
namespaceMatcher():: | ||
'cluster=~"$cluster", namespace=~"$namespace"', | ||
|
||
containerCPUUsagePanel(title, containerName):: | ||
$.panel(title) + | ||
$.queryPanel([ | ||
'sum by(pod) (rate(container_cpu_usage_seconds_total{%s,container="%s"}[$__rate_interval]))' % [$.namespaceMatcher(), containerName], | ||
'min(container_spec_cpu_quota{%s,container="%s"} / container_spec_cpu_period{%s,container="%s"})' % [$.namespaceMatcher(), containerName, $.namespaceMatcher(), containerName], | ||
], ['{{pod}}', 'limit']) + | ||
{ | ||
seriesOverrides: [ | ||
{ | ||
alias: 'limit', | ||
color: '#E02F44', | ||
fill: 0, | ||
}, | ||
], | ||
tooltip: { sort: 2 }, // Sort descending. | ||
}, | ||
|
||
containerMemoryWorkingSetPanel(title, containerName):: | ||
$.panel(title) + | ||
$.queryPanel([ | ||
// We use "max" instead of "sum" otherwise during a rolling update of a statefulset we will end up | ||
// summing the memory of the old pod (whose metric will be stale for 5m) to the new pod. | ||
'max by(pod) (container_memory_working_set_bytes{%s,container="%s"})' % [$.namespaceMatcher(), containerName], | ||
'min(container_spec_memory_limit_bytes{%s,container="%s"} > 0)' % [$.namespaceMatcher(), containerName], | ||
], ['{{pod}}', 'limit']) + | ||
{ | ||
seriesOverrides: [ | ||
{ | ||
alias: 'limit', | ||
color: '#E02F44', | ||
fill: 0, | ||
}, | ||
], | ||
yaxes: $.yaxes('bytes'), | ||
tooltip: { sort: 2 }, // Sort descending. | ||
}, | ||
|
||
goHeapInUsePanel(title, jobName):: | ||
$.panel(title) + | ||
$.queryPanel( | ||
'sum by(%s) (go_memstats_heap_inuse_bytes{%s})' % [$._config.per_instance_label, $.jobMatcher(jobName)], | ||
'{{%s}}' % $._config.per_instance_label | ||
) + | ||
{ | ||
yaxes: $.yaxes('bytes'), | ||
tooltip: { sort: 2 }, // Sort descending. | ||
}, | ||
|
||
filterNodeDiskContainer(containerName):: | ||
||| | ||
ignoring(%s) group_right() (label_replace(count by(%s, %s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) | ||
||| % [$._config.per_instance_label, $._config.per_node_label, $._config.per_instance_label, $.namespaceMatcher(), containerName], | ||
} |
106 changes: 106 additions & 0 deletions
106
production/loki-mixin/dashboards/loki-reads-resources.libsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
local utils = import 'mixin-utils/utils.libsonnet'; | ||
|
||
(import 'dashboard-utils.libsonnet') { | ||
grafanaDashboards+: | ||
{ | ||
'loki-reads-resources.json': | ||
($.dashboard('Loki / Reads Resources')) | ||
.addClusterSelectorTemplates(false) | ||
.addRow( | ||
$.row('Gateway') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'cortex-gw'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw'), | ||
) | ||
) | ||
.addRow( | ||
$.row('Query Frontend') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'query-frontend'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-frontend'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'query-frontend'), | ||
) | ||
) | ||
.addRow( | ||
$.row('Querier') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'querier'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'querier'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'querier'), | ||
) | ||
) | ||
.addRow( | ||
$.row('') | ||
.addPanel( | ||
$.panel('Disk Writes') + | ||
$.queryPanel( | ||
'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], | ||
'{{%s}} - {{device}}' % $._config.per_instance_label | ||
) + | ||
$.stack + | ||
{ yaxes: $.yaxes('Bps') }, | ||
) | ||
.addPanel( | ||
$.panel('Disk Reads') + | ||
$.queryPanel( | ||
'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], | ||
'{{%s}} - {{device}}' % $._config.per_instance_label | ||
) + | ||
$.stack + | ||
{ yaxes: $.yaxes('Bps') }, | ||
) | ||
.addPanel( | ||
$.panel('Disk Space Utilization') + | ||
$.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name=~"querier.*"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + | ||
{ yaxes: $.yaxes('percentunit') }, | ||
) | ||
) | ||
.addRow( | ||
$.row('Ingester') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'ingester'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'ingester'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), | ||
) | ||
) | ||
.addRow( | ||
$.row('Ruler') | ||
.addPanel( | ||
$.panel('Rules') + | ||
$.queryPanel( | ||
'sum by(%s) (cortex_prometheus_rule_group_rules{%s})' % [$._config.per_instance_label, $.jobMatcher('ruler')], | ||
'{{%s}}' % $._config.per_instance_label | ||
), | ||
) | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'ruler'), | ||
) | ||
) | ||
.addRow( | ||
$.row('') | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'ruler'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'ruler'), | ||
) | ||
), | ||
}, | ||
} |
85 changes: 85 additions & 0 deletions
85
production/loki-mixin/dashboards/loki-writes-resources.libsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
local utils = import 'mixin-utils/utils.libsonnet'; | ||
|
||
(import 'dashboard-utils.libsonnet') { | ||
grafanaDashboards+: | ||
{ | ||
'loki-writes-resources.json': | ||
$.dashboard('Loki / Writes Resources') | ||
.addClusterSelectorTemplates(false) | ||
.addRow( | ||
$.row('Gateway') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'cortex-gw'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw'), | ||
) | ||
) | ||
.addRow( | ||
$.row('Distributor') | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'distributor'), | ||
) | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'distributor'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'distributor'), | ||
) | ||
) | ||
.addRow( | ||
$.row('Ingester') | ||
.addPanel( | ||
$.panel('In-memory streams') + | ||
$.queryPanel( | ||
'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher('ingester')], | ||
'{{%s}}' % $._config.per_instance_label | ||
) + | ||
{ | ||
tooltip: { sort: 2 }, // Sort descending. | ||
}, | ||
) | ||
.addPanel( | ||
$.containerCPUUsagePanel('CPU', 'ingester'), | ||
) | ||
) | ||
.addRow( | ||
$.row('') | ||
.addPanel( | ||
$.containerMemoryWorkingSetPanel('Memory (workingset)', 'ingester'), | ||
) | ||
.addPanel( | ||
$.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), | ||
) | ||
) | ||
.addRow( | ||
$.row('') | ||
.addPanel( | ||
$.panel('Disk Writes') + | ||
$.queryPanel( | ||
'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], | ||
'{{%s}} - {{device}}' % $._config.per_instance_label | ||
) + | ||
$.stack + | ||
{ yaxes: $.yaxes('Bps') }, | ||
) | ||
.addPanel( | ||
$.panel('Disk Reads') + | ||
$.queryPanel( | ||
'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], | ||
'{{%s}} - {{device}}' % $._config.per_instance_label | ||
) + | ||
$.stack + | ||
{ yaxes: $.yaxes('Bps') }, | ||
) | ||
.addPanel( | ||
$.panel('Disk Space Utilization') + | ||
$.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name=~"ingester.*"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + | ||
{ yaxes: $.yaxes('percentunit') }, | ||
) | ||
), | ||
}, | ||
} |