From 31e057f981a3ead8ab3e5cad63ec55237efea431 Mon Sep 17 00:00:00 2001 From: Mitch Date: Thu, 2 Jan 2025 09:26:08 -0500 Subject: [PATCH 1/3] chore: load in the big dashboard during metrics install --- spartan/metrics/install-kind.sh | 4 + spartan/metrics/install-prod.sh | 3 + spartan/metrics/values.tmp.yaml | 166 ++++++++++++++++ spartan/metrics/values.yaml | 332 -------------------------------- 4 files changed, 173 insertions(+), 332 deletions(-) create mode 100644 spartan/metrics/values.tmp.yaml delete mode 100644 spartan/metrics/values.yaml diff --git a/spartan/metrics/install-kind.sh b/spartan/metrics/install-kind.sh index 3a9ecfb4ccf..68d473002a2 100755 --- a/spartan/metrics/install-kind.sh +++ b/spartan/metrics/install-kind.sh @@ -10,6 +10,10 @@ if helm ls --namespace metrics | grep -q metrics; then exit 0 fi +# Inject the Aztec Networks dashboard into values.yaml +DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) +yq -Y --arg dashboard "$DASHBOARD_JSON" '.grafana.dashboards.default."aztec-networks".json = $dashboard' values.tmp.yaml > values.yaml + helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts helm repo add grafana https://grafana.github.io/helm-charts helm repo add prometheus-community https://prometheus-community.github.io/helm-charts diff --git a/spartan/metrics/install-prod.sh b/spartan/metrics/install-prod.sh index a61cc2b8ef4..086631c64d5 100755 --- a/spartan/metrics/install-prod.sh +++ b/spartan/metrics/install-prod.sh @@ -3,4 +3,7 @@ set -eu cd "$(dirname "${BASH_SOURCE[0]}")" +DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) +yq -Y --arg dashboard "$DASHBOARD_JSON" '.grafana.dashboards.default."aztec-networks".json = $dashboard' values.tmp.yaml > values.yaml + helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace $@ diff --git a/spartan/metrics/values.tmp.yaml b/spartan/metrics/values.tmp.yaml new file mode 100644 index 00000000000..f6eb0e506c1 --- /dev/null +++ b/spartan/metrics/values.tmp.yaml @@ -0,0 +1,166 @@ +opentelemetry-collector: + mode: deployment + + service: + enabled: true + + image: + repository: "otel/opentelemetry-collector-contrib" + + ports: + otlp-http: + enabled: true + containerPort: 4318 + servicePort: 4318 + hostPort: 4318 + protocol: TCP + otel-metrics: + enabled: true + containerPort: 8888 + servicePort: 8888 + hostPort: 8888 + protocol: TCP + aztec-metrics: + enabled: true + containerPort: 8889 + servicePort: 8889 + hostPort: 8889 + protocol: TCP + + presets: + kubernetesAttributes: + enabled: true + config: + extensions: + health_check: + endpoint: ${env:MY_POD_IP}:13133 + processors: + resource: + attributes: + - action: preserve + key: k8s.namespace.name + batch: {} + receivers: + otlp: + protocols: + http: + endpoint: ${env:MY_POD_IP}:4318 + grpc: + endpoint: ${env:MY_POD_IP}:4317 + service: + extensions: [health_check] + telemetry: + metrics: + address: ${env:MY_POD_IP}:8888 + pipelines: + logs: + receivers: + - otlp + processors: + - batch + exporters: + - otlphttp/logs + traces: + receivers: + - otlp + processors: + - batch + exporters: + - otlp/tempo + metrics: + receivers: + - otlp + processors: + - batch + exporters: + - prometheus + # - debug + +# Enable and configure the Loki subchart +# https://artifacthub.io/packages/helm/grafana/loki +# loki: +# Nothing set here, because we need to use values from the values directory; +# otherwise, things don't get overridden correctly. + +# Enable and configure the Tempo subchart +# https://artifacthub.io/packages/helm/grafana/tempo +tempo: + minio: + enabled: true + mode: standalone + rootUser: grafana-tempo + rootPassword: supersecret + buckets: + # Default Tempo storage bucket + - name: tempo-traces + policy: none + purge: false + traces: + otlp: + grpc: + enabled: true + http: + enabled: true + zipkin: + enabled: false + jaeger: + thriftHttp: + enabled: false + opencensus: + enabled: false + +prometheus: + server: + global: + evaluation_interval: 15s + scrape_interval: 15s + serverFiles: + prometheus.yml: + scrape_configs: + - job_name: otel-collector + static_configs: + - targets: ["metrics-opentelemetry-collector.metrics:8888"] + - job_name: aztec + static_configs: + - targets: ["metrics-opentelemetry-collector.metrics:8889"] + - job_name: "kube-state-metrics" + static_configs: + - targets: + ["metrics-kube-state-metrics.metrics.svc.cluster.local:8080"] + +# Enable and configure Grafana +# https://artifacthub.io/packages/helm/grafana/grafana +grafana: + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Loki + type: loki + url: http://metrics-loki.metrics:3100 + - name: Tempo + type: tempo + url: http://metrics-tempo.metrics:3100 + - name: Prometheus + type: prometheus + uid: spartan-metrics-prometheus + isDefault: true + url: http://metrics-prometheus-server.metrics:80 + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: "default" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default + dashboards: + default: + # unfortunately, we can't use the `file` helper here, so we have to inline the dashboard + # json. This is a limitation of Helm. + # See the install scripts: we inject the dashboard json into a copy of this file, which is the + # version that actually gets helm installed. diff --git a/spartan/metrics/values.yaml b/spartan/metrics/values.yaml deleted file mode 100644 index df2ca87aa21..00000000000 --- a/spartan/metrics/values.yaml +++ /dev/null @@ -1,332 +0,0 @@ -opentelemetry-collector: - mode: deployment - - service: - enabled: true - - image: - repository: "otel/opentelemetry-collector-contrib" - - ports: - otlp-http: - enabled: true - containerPort: 4318 - servicePort: 4318 - hostPort: 4318 - protocol: TCP - otel-metrics: - enabled: true - containerPort: 8888 - servicePort: 8888 - hostPort: 8888 - protocol: TCP - aztec-metrics: - enabled: true - containerPort: 8889 - servicePort: 8889 - hostPort: 8889 - protocol: TCP - - presets: - kubernetesAttributes: - enabled: true - config: - extensions: - health_check: - endpoint: ${env:MY_POD_IP}:13133 - processors: - resource: - attributes: - - action: preserve - key: k8s.namespace.name - batch: {} - receivers: - otlp: - protocols: - http: - endpoint: ${env:MY_POD_IP}:4318 - grpc: - endpoint: ${env:MY_POD_IP}:4317 - service: - extensions: [health_check] - telemetry: - metrics: - address: ${env:MY_POD_IP}:8888 - pipelines: - logs: - receivers: - - otlp - processors: - - batch - exporters: - - otlphttp/logs - traces: - receivers: - - otlp - processors: - - batch - exporters: - - otlp/tempo - metrics: - receivers: - - otlp - processors: - - batch - exporters: - - prometheus - # - debug - -# Enable and configure the Loki subchart -# https://artifacthub.io/packages/helm/grafana/loki -# loki: -# Nothing set here, because we need to use values from the values directory; -# otherwise, things don't get overridden correctly. - -# Enable and configure the Tempo subchart -# https://artifacthub.io/packages/helm/grafana/tempo -tempo: - minio: - enabled: true - mode: standalone - rootUser: grafana-tempo - rootPassword: supersecret - buckets: - # Default Tempo storage bucket - - name: tempo-traces - policy: none - purge: false - traces: - otlp: - grpc: - enabled: true - http: - enabled: true - zipkin: - enabled: false - jaeger: - thriftHttp: - enabled: false - opencensus: - enabled: false - -prometheus: - server: - global: - evaluation_interval: 15s - scrape_interval: 15s - serverFiles: - prometheus.yml: - scrape_configs: - - job_name: otel-collector - static_configs: - - targets: ["metrics-opentelemetry-collector.metrics:8888"] - - job_name: aztec - static_configs: - - targets: ["metrics-opentelemetry-collector.metrics:8889"] - - job_name: "kube-state-metrics" - static_configs: - - targets: - ["metrics-kube-state-metrics.metrics.svc.cluster.local:8080"] - -# Enable and configure Grafana -# https://artifacthub.io/packages/helm/grafana/grafana -grafana: - datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: Loki - type: loki - url: http://metrics-loki.metrics:3100 - - name: Tempo - type: tempo - url: http://metrics-tempo.metrics:3100 - - name: Prometheus - type: prometheus - uid: spartan-metrics-prometheus - isDefault: true - url: http://metrics-prometheus-server.metrics:80 - dashboardProviders: - dashboardproviders.yaml: - apiVersion: 1 - providers: - - name: "default" - orgId: 1 - folder: "" - type: file - disableDeletion: false - editable: true - options: - path: /var/lib/grafana/dashboards/default - dashboards: - default: - # unfortunately, we can't use the `file` helper here, so we have to inline the dashboard - # json. This is a limitation of Helm. - # See https://github.com/helm/helm/issues/1892 - spartan-dashboard: - json: | - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 1, - "links": [], - "panels": [ - { - "datasource": { - "default": false, - "type": "prometheus", - "uid": "spartan-metrics-prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "series", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "spartan-metrics-prometheus" - }, - "editorMode": "code", - "expr": "aztec_archiver_block_height", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "L2 Block Height", - "type": "timeseries" - } - ], - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "smoke", - "value": "smoke" - }, - "datasource": { - "type": "prometheus", - "uid": "spartan-metrics-prometheus" - }, - "definition": "label_values(k8s_namespace_name)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "Deployment", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(k8s_namespace_name)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Spartan Deployments", - "uid": "ae01y5sn1bls0a", - "version": 1, - "weekStart": "" - } From 2ae8519e3136852ad87dfb8baedabfc2efbc324a Mon Sep 17 00:00:00 2001 From: Mitch Date: Fri, 3 Jan 2025 15:12:27 -0500 Subject: [PATCH 2/3] fix: use same version of yq as in CI --- spartan/metrics/.gitignore | 1 + spartan/metrics/install-kind.sh | 2 +- spartan/metrics/install-prod.sh | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 spartan/metrics/.gitignore diff --git a/spartan/metrics/.gitignore b/spartan/metrics/.gitignore new file mode 100644 index 00000000000..7d101009614 --- /dev/null +++ b/spartan/metrics/.gitignore @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/spartan/metrics/install-kind.sh b/spartan/metrics/install-kind.sh index 68d473002a2..9cd2a5126e4 100755 --- a/spartan/metrics/install-kind.sh +++ b/spartan/metrics/install-kind.sh @@ -12,7 +12,7 @@ fi # Inject the Aztec Networks dashboard into values.yaml DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) -yq -Y --arg dashboard "$DASHBOARD_JSON" '.grafana.dashboards.default."aztec-networks".json = $dashboard' values.tmp.yaml > values.yaml +DASHBOARD_JSON=$DASHBOARD_JSON ~/bin/yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts helm repo add grafana https://grafana.github.io/helm-charts diff --git a/spartan/metrics/install-prod.sh b/spartan/metrics/install-prod.sh index 086631c64d5..9adb635b0d7 100755 --- a/spartan/metrics/install-prod.sh +++ b/spartan/metrics/install-prod.sh @@ -4,6 +4,6 @@ set -eu cd "$(dirname "${BASH_SOURCE[0]}")" DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) -yq -Y --arg dashboard "$DASHBOARD_JSON" '.grafana.dashboards.default."aztec-networks".json = $dashboard' values.tmp.yaml > values.yaml +DASHBOARD_JSON=$DASHBOARD_JSON ~/bin/yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace $@ From 7e3a75a2cedb57604271728b99ade3915f221660 Mon Sep 17 00:00:00 2001 From: Mitch Date: Fri, 3 Jan 2025 15:12:54 -0500 Subject: [PATCH 3/3] fix: correct path --- spartan/metrics/install-kind.sh | 2 +- spartan/metrics/install-prod.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spartan/metrics/install-kind.sh b/spartan/metrics/install-kind.sh index 9cd2a5126e4..5b1c9ce7900 100755 --- a/spartan/metrics/install-kind.sh +++ b/spartan/metrics/install-kind.sh @@ -12,7 +12,7 @@ fi # Inject the Aztec Networks dashboard into values.yaml DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) -DASHBOARD_JSON=$DASHBOARD_JSON ~/bin/yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml +DASHBOARD_JSON=$DASHBOARD_JSON yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts helm repo add grafana https://grafana.github.io/helm-charts diff --git a/spartan/metrics/install-prod.sh b/spartan/metrics/install-prod.sh index 9adb635b0d7..edb5af00aa7 100755 --- a/spartan/metrics/install-prod.sh +++ b/spartan/metrics/install-prod.sh @@ -4,6 +4,6 @@ set -eu cd "$(dirname "${BASH_SOURCE[0]}")" DASHBOARD_JSON=$(jq -c '.' grafana_dashboards/aztec-dashboard-all-in-one.json) -DASHBOARD_JSON=$DASHBOARD_JSON ~/bin/yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml +DASHBOARD_JSON=$DASHBOARD_JSON yq e '.grafana.dashboards.default."aztec-networks".json = strenv(DASHBOARD_JSON)' values.tmp.yaml > values.yaml helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace $@