From 9b2056b6bc8054de6fb397212eed973fedc101a3 Mon Sep 17 00:00:00 2001 From: vankichi Date: Mon, 4 Mar 2024 12:20:18 +0900 Subject: [PATCH 1/8] :chart_with_upwards_trend: Add base of benchmark operator dashboard Signed-off-by: vankichi --- .../crds/valdbenchmarkoperatorrelease.yaml | 9 +- .../templates/deployment.yaml | 7 +- charts/vald-benchmark-operator/values.yaml | 25 +- .../10-vald-benchmark-operator.yaml | 1581 +++++++++++++++++ k8s/metrics/grafana/deployment.yaml | 6 + .../crds/valdbenchmarkoperatorrelease.yaml | 9 +- k8s/tools/benchmark/operator/deployment.yaml | 12 + pkg/tools/benchmark/operator/config/config.go | 12 +- .../benchmark/operator/usecase/benchmarkd.go | 10 +- 9 files changed, 1646 insertions(+), 25 deletions(-) create mode 100644 k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml index 8886ca72ea..92458c639c 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml @@ -40,7 +40,7 @@ spec: type: string schema: openAPIV3Schema: - description: ValdBenchmarkScenario is the Schema for the valdbenchmarkscenarios API + description: ValdBenchmarkOperator is the Schema for the valdbenchmarkoperator API type: object properties: apiVersion: @@ -52,7 +52,7 @@ spec: metadata: type: object status: - description: ValdBenchmarkScenarioStatus defines the observed state of ValdBenchmarkScenario + description: ValdBenchmarkOperatorStatus defines the observed state of ValdBenchmarkOperator enum: - NotReady - Completed @@ -68,6 +68,11 @@ spec: annotations: type: object x-kubernetes-preserve-unknown-fields: true + env: + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true image: type: object properties: diff --git a/charts/vald-benchmark-operator/templates/deployment.yaml b/charts/vald-benchmark-operator/templates/deployment.yaml index a1dcf4ab59..14df9b4c2c 100644 --- a/charts/vald-benchmark-operator/templates/deployment.yaml +++ b/charts/vald-benchmark-operator/templates/deployment.yaml @@ -122,11 +122,10 @@ spec: volumeMounts: - name: {{ .Values.name }}-config mountPath: /etc/server + {{- if .Values.env }} env: - - name: JOB_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace + {{- toYaml .Values.env | nindent 12 }} + {{- end }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File restartPolicy: Always diff --git a/charts/vald-benchmark-operator/values.yaml b/charts/vald-benchmark-operator/values.yaml index ea2ce647b7..3aebcabc36 100644 --- a/charts/vald-benchmark-operator/values.yaml +++ b/charts/vald-benchmark-operator/values.yaml @@ -454,11 +454,11 @@ observability: # @schema {"name": "observability.otlp.attribute", "type": "object"} attribute: # @schema {"name": "observability.otlp.attribute.namespace", "type": "string"} - namespace: "_MY_POD_NAMESPACE_" + namespace: _MY_POD_NAMESPACE_ # @schema {"name": "observability.otlp.attribute.pod_name", "type": "string"} - pod_name: "_MY_POD_NAME_" + pod_name: _MY_POD_NAME_ # @schema {"name": "observability.otlp.attribute.node_name", "type": "string"} - node_name: "_MY_NODE_NAME_" + node_name: _MY_NODE_NAME_ # @schema {"name": "observability.otlp.attribute.service_name", "type": "string"} service_name: "vald-benchmark-operator" # @schema {"name": "observability.otlp.attribute.metrics", "type": "object"} @@ -487,3 +487,22 @@ observability: enabled: false # @schema {"name": "observability.trace.sampling_rate", "type": "integer"} sampling_rate: 1 +# @schema {"name": "env", "type": "array", "items": {"type": "object"}, "anchor": "env"} +# env -- environment variables +env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: JOB_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace diff --git a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml new file mode 100644 index 0000000000..4a27782453 --- /dev/null +++ b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml @@ -0,0 +1,1581 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards-vald-benchmark-operator +data: + vald-benchmark-operator.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 14, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^vald_version$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Vald Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 16, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^go_version$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"go_version\", \"v$1\", \"go_version\", \"([^v].*)\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Go Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 34, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^go_os$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Go OS", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 300 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 19, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "count(kube_pod_info{namespace=\"$Namespace\", pod=~\"$ReplicaSet.*\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods ($ReplicaSet)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10000000000 + }, + { + "color": "#d44a3a", + "value": 1000000000000 + } + ] + }, + "unit": "decbytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 20, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", image!=\"\"})", + "format": "time_series", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total memory working set ($ReplicaSet)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 3 + }, + "id": 17, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^git_commit$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Git Commit", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 3 + }, + "id": 18, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^build_time$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersin": "8.0.1", + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Build at", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 300 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 3 + }, + "id": 39, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "vald_benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=\"$PodName\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Benchmark Job Image", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_statefulset_created{statefulset=\"$ReplicaSet\"}) >= 1", + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_deployment_created{deployment=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_daemonset_created{daemonset=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:76", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:77", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_statefulset_created{statefulset=\"$ReplicaSet\"}) >= 1", + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_deployment_created{deployment=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_daemonset_created{daemonset=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory working set", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:154", + "format": "decbytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:155", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 30, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(server_completed_rpcs{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (grpc_server_method, grpc_server_status)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} ({{grpc_server_status}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(server_completed_rpcs{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (grpc_server_status)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total ({{grpc_server_status}})", + "range": true, + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Completed RPCs /s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 32, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p99", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "goroutine_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{target_pod}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "goroutine count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [], + "unitScale": true + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 38, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(gc_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_node=~\".+\"}[$interval])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{target_pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "GC count /s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_pod_info, namespace)", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "Namespace", + "options": [], + "query": { + "query": "label_values(kube_pod_info, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "vald-benchmark-operator", + "value": "vald-benchmark-operator" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(app_version_info{server_name=~\"benchmark operator.*\"}, kubernetes_name)", + "hide": 0, + "includeAll": false, + "label": "name", + "multi": false, + "name": "ReplicaSet", + "options": [], + "query": { + "query": "label_values(app_version_info{server_name=~\"benchmark operator.*\"}, kubernetes_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(app_version_info{server_name=~\"benchmark operator.*\", kubernetes_name=~\"$ReplicaSet\"}, target_pod)", + "hide": 0, + "includeAll": true, + "label": "pod", + "multi": false, + "name": "PodName", + "options": [], + "query": { + "query": "label_values(app_version_info{server_name=~\"benchmark operator.*\", kubernetes_name=~\"$ReplicaSet\"}, target_pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "1m,2m,5m,10m,30m,1h,6h,12h,1d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Vald Benchmark Operator", + "uid": "JkemcMB", + "version": 1, + "weekStart": "" + } diff --git a/k8s/metrics/grafana/deployment.yaml b/k8s/metrics/grafana/deployment.yaml index cfdee03814..21555cbe04 100644 --- a/k8s/metrics/grafana/deployment.yaml +++ b/k8s/metrics/grafana/deployment.yaml @@ -51,6 +51,8 @@ spec: mountPath: /var/lib/grafana/dashboards-vald/08 - name: grafana-dashboards-vald-index-correction mountPath: /var/lib/grafana/dashboards-vald/09 + - name: grafana-dashboards-vald-benchmark-operator + mountPath: /var/lib/grafana/dashboards-vald/10 - name: grafana-dashboards-vald-agent-memory mountPath: /var/lib/grafana/dashboards-vald/99 volumes: @@ -94,3 +96,7 @@ spec: configMap: defaultMode: 420 name: grafana-dashboards-vald-agent-memory + - name: grafana-dashboards-vald-benchmark-operator + configMap: + defaultMode: 420 + name: grafana-dashboards-vald-benchmark-operator diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml index 8886ca72ea..92458c639c 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml @@ -40,7 +40,7 @@ spec: type: string schema: openAPIV3Schema: - description: ValdBenchmarkScenario is the Schema for the valdbenchmarkscenarios API + description: ValdBenchmarkOperator is the Schema for the valdbenchmarkoperator API type: object properties: apiVersion: @@ -52,7 +52,7 @@ spec: metadata: type: object status: - description: ValdBenchmarkScenarioStatus defines the observed state of ValdBenchmarkScenario + description: ValdBenchmarkOperatorStatus defines the observed state of ValdBenchmarkOperator enum: - NotReady - Completed @@ -68,6 +68,11 @@ spec: annotations: type: object x-kubernetes-preserve-unknown-fields: true + env: + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true image: type: object properties: diff --git a/k8s/tools/benchmark/operator/deployment.yaml b/k8s/tools/benchmark/operator/deployment.yaml index 0d011c806f..a5d2394d3b 100644 --- a/k8s/tools/benchmark/operator/deployment.yaml +++ b/k8s/tools/benchmark/operator/deployment.yaml @@ -98,6 +98,18 @@ spec: - name: vald-benchmark-operator-config mountPath: /etc/server env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: JOB_NAMESPACE valueFrom: fieldRef: diff --git a/pkg/tools/benchmark/operator/config/config.go b/pkg/tools/benchmark/operator/config/config.go index 0fe9dadea0..4285973f7b 100644 --- a/pkg/tools/benchmark/operator/config/config.go +++ b/pkg/tools/benchmark/operator/config/config.go @@ -19,6 +19,7 @@ package config import ( "github.com/vdaas/vald/internal/config" + "github.com/vdaas/vald/internal/log" ) // GlobalConfig is type alias for config.GlobalConfig. @@ -35,9 +36,6 @@ type Config struct { // Observability represent observability configurations Observability *config.Observability `json:"observability" yaml:"observability"` - // Scenario represents benchmark scenario configurations - Scenario *config.BenchmarkScenario `json:"scenario" yaml:"scenario"` - // JobImage represents the location of Docker image for benchmark job and its ImagePullPolicy JobImage *config.BenchmarkJobImageInfo `json:"job_image" yaml:"job_image"` } @@ -66,13 +64,7 @@ func NewConfig(path string) (cfg *Config, err error) { } else { cfg.JobImage = new(config.BenchmarkJobImageInfo) } - - if cfg.Scenario != nil { - cfg.Scenario = cfg.Scenario.Bind() - } else { - cfg.Scenario = new(config.BenchmarkScenario) - } - + log.Error(cfg) return cfg, nil } diff --git a/pkg/tools/benchmark/operator/usecase/benchmarkd.go b/pkg/tools/benchmark/operator/usecase/benchmarkd.go index 448d99f36a..4ef2ad323e 100644 --- a/pkg/tools/benchmark/operator/usecase/benchmarkd.go +++ b/pkg/tools/benchmark/operator/usecase/benchmarkd.go @@ -27,6 +27,7 @@ import ( "github.com/vdaas/vald/internal/net/grpc" "github.com/vdaas/vald/internal/net/grpc/interceptor/server/recover" "github.com/vdaas/vald/internal/observability" + backoffmetrics "github.com/vdaas/vald/internal/observability/metrics/backoff" infometrics "github.com/vdaas/vald/internal/observability/metrics/info" "github.com/vdaas/vald/internal/runner" "github.com/vdaas/vald/internal/safety" @@ -52,11 +53,11 @@ type run struct { var JOB_NAMESPACE = os.Getenv("JOB_NAMESPACE") func New(cfg *config.Config) (r runner.Runner, err error) { - log.Info("pkg/tools/benchmark/scenario/cmd start") + log.Info("pkg/tools/benchmark/operator/cmd start") eg := errgroup.Get() - log.Info("pkg/tools/benchmark/scenario/cmd success d") + log.Info("pkg/tools/benchmark/operator/cmd success d") operator, err := service.New( service.WithErrGroup(eg), @@ -95,7 +96,8 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if cfg.Observability.Enabled { obs, err = observability.NewWithConfig( cfg.Observability, - infometrics.New("vald_benchmark_scenario_info", "Benchmark Scenario info", *cfg.Scenario), + infometrics.New("benchmark_operator_info", "Benchmark Operator Info", *cfg.JobImage), + backoffmetrics.New(), ) if err != nil { return nil, err @@ -125,7 +127,7 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if err != nil { return nil, err } - log.Info("pkg/tools/benchmark/scenario/cmd end") + log.Info("pkg/tools/benchmark/operator/cmd end") return &run{ eg: eg, From 7fee341ab684bbf103ba1a376c68e884e6b80b99 Mon Sep 17 00:00:00 2001 From: vankichi Date: Mon, 4 Mar 2024 12:25:59 +0900 Subject: [PATCH 2/8] :green_heart: Fix build benchmark images trigger Signed-off-by: vankichi --- .../workflows/dockers-benchmark-job-image.yml | 24 +++++++++---------- .../dockers-benchmark-operator-image.yaml | 24 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/dockers-benchmark-job-image.yml b/.github/workflows/dockers-benchmark-job-image.yml index 4a2d0479f6..4ecd0e4b60 100644 --- a/.github/workflows/dockers-benchmark-job-image.yml +++ b/.github/workflows/dockers-benchmark-job-image.yml @@ -33,10 +33,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" pull_request: @@ -50,10 +50,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" pull_request_target: @@ -67,10 +67,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" jobs: diff --git a/.github/workflows/dockers-benchmark-operator-image.yaml b/.github/workflows/dockers-benchmark-operator-image.yaml index 2424657297..d56d0cd76f 100644 --- a/.github/workflows/dockers-benchmark-operator-image.yaml +++ b/.github/workflows/dockers-benchmark-operator-image.yaml @@ -33,10 +33,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" pull_request: @@ -50,10 +50,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" pull_request_target: @@ -67,10 +67,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" jobs: From 45276e2436669879485255221cd3e3489d2fd2c0 Mon Sep 17 00:00:00 2001 From: vankichi Date: Mon, 4 Mar 2024 17:47:38 +0900 Subject: [PATCH 3/8] :green_heart: Add info tag for info metrics Signed-off-by: vankichi --- internal/config/benchmark.go | 4 ++-- pkg/tools/benchmark/operator/config/config.go | 2 -- pkg/tools/benchmark/operator/usecase/benchmarkd.go | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go index 68272a58bc..cb7039dfb0 100644 --- a/internal/config/benchmark.go +++ b/internal/config/benchmark.go @@ -234,8 +234,8 @@ func (b *BenchmarkScenario) Bind() *BenchmarkScenario { // BenchmarkJobImageInfo represents the docker image information for benchmark job. type BenchmarkJobImageInfo struct { - Image string `json:"image,omitempty" yaml:"image"` - PullPolicy string `json:"pull_policy,omitempty" yaml:"pull_policy"` + Image string `info:"image" json:"image,omitempty" yaml:"image"` + PullPolicy string `info:"pull_policy" json:"pull_policy,omitempty" yaml:"pull_policy"` } // Bind binds the actual data from the BenchmarkJobImageInfo receiver fields. diff --git a/pkg/tools/benchmark/operator/config/config.go b/pkg/tools/benchmark/operator/config/config.go index 4285973f7b..f2c8f76b62 100644 --- a/pkg/tools/benchmark/operator/config/config.go +++ b/pkg/tools/benchmark/operator/config/config.go @@ -19,7 +19,6 @@ package config import ( "github.com/vdaas/vald/internal/config" - "github.com/vdaas/vald/internal/log" ) // GlobalConfig is type alias for config.GlobalConfig. @@ -64,7 +63,6 @@ func NewConfig(path string) (cfg *Config, err error) { } else { cfg.JobImage = new(config.BenchmarkJobImageInfo) } - log.Error(cfg) return cfg, nil } diff --git a/pkg/tools/benchmark/operator/usecase/benchmarkd.go b/pkg/tools/benchmark/operator/usecase/benchmarkd.go index 4ef2ad323e..134226967a 100644 --- a/pkg/tools/benchmark/operator/usecase/benchmarkd.go +++ b/pkg/tools/benchmark/operator/usecase/benchmarkd.go @@ -96,7 +96,7 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if cfg.Observability.Enabled { obs, err = observability.NewWithConfig( cfg.Observability, - infometrics.New("benchmark_operator_info", "Benchmark Operator Info", *cfg.JobImage), + infometrics.New("benchmark_operator_info", "Benchmark Operator info", *cfg.JobImage), backoffmetrics.New(), ) if err != nil { From f0ebf37265e1dda7b000d862ca7b57f150a9b6b8 Mon Sep 17 00:00:00 2001 From: vankichi Date: Tue, 5 Mar 2024 11:46:08 +0900 Subject: [PATCH 4/8] :chart_with_upwards_trend: showing job image and tags Signed-off-by: vankichi --- .../10-vald-benchmark-operator.yaml | 71 +++++++++---------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml index 4a27782453..8a2f64bbde 100644 --- a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml +++ b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml @@ -127,7 +127,7 @@ data: "refId": "A" } ], - "title": "Vald Version", + "title": "Operator Version", "type": "stat" }, { @@ -333,7 +333,7 @@ data: "overrides": [] }, "gridPos": { - "h": 3, + "h": 6, "w": 4, "x": 12, "y": 0 @@ -635,46 +635,32 @@ data: }, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", + "color": "green", "value": null }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 300 + "color": "red", + "value": 80 } ] }, - "unit": "none", + "unit": "string", "unitScale": true }, "overrides": [] }, "gridPos": { "h": 3, - "w": 4, - "x": 12, - "y": 3 + "w": 8, + "x": 0, + "y": 6 }, - "id": 39, + "id": 40, "links": [], "maxDataPoints": 100, "options": { @@ -686,7 +672,7 @@ data: "calcs": [ "lastNotNull" ], - "fields": "/.*/", + "fields": "/^image$/", "values": false }, "showPercentChange": false, @@ -702,18 +688,31 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "vald_benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=\"$PodName\"}", + "expr": "benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", "format": "table", - "hide": false, "instant": true, "interval": "", - "legendFormat": "", + "legendFormat": "__auto", "refId": "A" } ], - "title": "Benchmark Job Image", + "title": "Job Image", + "transformations": [], "type": "stat" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 39, + "panels": [], + "title": "Operator Metrics", + "type": "row" + }, { "aliasColors": {}, "bars": false, @@ -736,7 +735,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 10 }, "hiddenSeries": false, "id": 22, @@ -862,7 +861,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 10 }, "hiddenSeries": false, "id": 27, @@ -988,7 +987,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 18 }, "hiddenSeries": false, "id": 30, @@ -1098,7 +1097,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 18 }, "hiddenSeries": false, "id": 32, @@ -1221,7 +1220,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 26 }, "hiddenSeries": false, "id": 36, @@ -1317,7 +1316,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 26 }, "hiddenSeries": false, "id": 38, From cb0c47fffbf175754dc31ebb3015f995ccb08b62 Mon Sep 17 00:00:00 2001 From: vankichi Date: Fri, 8 Mar 2024 10:08:24 +0900 Subject: [PATCH 5/8] :chart_with_upwards_trend: add bench job status Signed-off-by: vankichi --- internal/config/benchmark.go | 2 +- .../metrics/tools/benchmark/benchmark.go | 194 ++++++++++++++++++ .../10-vald-benchmark-operator.yaml | 2 +- .../benchmark/operator/service/operator.go | 40 ++++ .../benchmark/operator/usecase/benchmarkd.go | 2 + 5 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 internal/observability/metrics/tools/benchmark/benchmark.go diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go index cb7039dfb0..fd45f4f293 100644 --- a/internal/config/benchmark.go +++ b/internal/config/benchmark.go @@ -234,7 +234,7 @@ func (b *BenchmarkScenario) Bind() *BenchmarkScenario { // BenchmarkJobImageInfo represents the docker image information for benchmark job. type BenchmarkJobImageInfo struct { - Image string `info:"image" json:"image,omitempty" yaml:"image"` + Image string `info:"image" json:"image,omitempty" yaml:"image"` PullPolicy string `info:"pull_policy" json:"pull_policy,omitempty" yaml:"pull_policy"` } diff --git a/internal/observability/metrics/tools/benchmark/benchmark.go b/internal/observability/metrics/tools/benchmark/benchmark.go new file mode 100644 index 0000000000..ff64e8ddc5 --- /dev/null +++ b/internal/observability/metrics/tools/benchmark/benchmark.go @@ -0,0 +1,194 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package benchmark + +import ( + "context" + + v1 "github.com/vdaas/vald/internal/k8s/vald/benchmark/api/v1" + "github.com/vdaas/vald/internal/observability/metrics" + "github.com/vdaas/vald/pkg/tools/benchmark/operator/service" + api "go.opentelemetry.io/otel/metric" + view "go.opentelemetry.io/otel/sdk/metric" +) + +const ( + appliedScenarioCount = "benchmark_operator_applied_scenario" + appliedScenarioCountDescription = "Benchmark Operator applied scenario count" + + runningScenarioCount = "benchmark_operator_running_scenario" + runningScenarioCountDescription = "Benchmark Operator running scenario count" + + completeScenarioCount = "benchmark_operator_complete_scenario" + completeScenarioCountDescription = "Benchmark Operator complete scenario count" + + appliedBenchmarkJobCount = "benchmark_operator_applied_benchmark_job" + appliedBenchmarkJobCountDescription = "Benchmark Operator applied benchmark job count" + + runningBenchmarkJobCount = "benchmark_operator_running_benchmark_job" + runningBenchmarkJobCountDescription = "Benchmark Operator running benchmark job count" + + completeBenchmarkJobCount = "benchmark_operator_complete_benchmark_job" + completeBenchmarkJobCountDescription = "Benchmark Operator complete benchmark job count" +) + +const ( + applied = "applied" + running = "running" + complete = "complete" +) + +type operatorMetrics struct { + op service.Operator +} + +func New(om service.Operator) metrics.Metric { + return &operatorMetrics{ + op: om, + } +} + +// TODO: implement here +func (om *operatorMetrics) View() ([]metrics.View, error) { + return []metrics.View{ + view.NewView( + view.Instrument{ + Name: appliedScenarioCount, + Description: appliedScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: runningScenarioCount, + Description: runningScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: completeScenarioCount, + Description: completeScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + }, nil +} + +// TODO: implement here +func (om *operatorMetrics) Register(m metrics.Meter) error { + appliedScCount, err := m.Int64ObservableCounter( + appliedScenarioCount, + metrics.WithDescription(appliedScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + runningScCount, err := m.Int64ObservableCounter( + runningScenarioCount, + metrics.WithDescription(runningScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + completeScCount, err := m.Int64ObservableCounter( + completeScenarioCount, + metrics.WithDescription(completeScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + + appliedBjCount, err := m.Int64ObservableCounter( + appliedBenchmarkJobCount, + metrics.WithDescription(appliedScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + runningBjCount, err := m.Int64ObservableCounter( + runningBenchmarkJobCount, + metrics.WithDescription(runningScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + completeBjCount, err := m.Int64ObservableCounter( + completeBenchmarkJobCount, + metrics.WithDescription(completeScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + + _, err = m.RegisterCallback( + func(_ context.Context, o api.Observer) error { + // scenario status + sst := map[string]int64{ + applied: 0, + running: 0, + complete: 0, + } + for k, v := range om.op.LenBenchSC() { + sst[applied] += v + if k == v1.BenchmarkScenarioCompleted { + sst[complete] += v + } else { + sst[running] += v + } + } + o.ObserveInt64(appliedScCount, sst[applied]) + o.ObserveInt64(runningScCount, sst[running]) + o.ObserveInt64(completeScCount, sst[complete]) + + // benchmark job status + bst := map[string]int64{ + applied: 0, + running: 0, + complete: 0, + } + for k, v := range om.op.LenBenchBJ() { + sst[applied] += v + if k == v1.BenchmarkJobCompleted { + sst[complete] += v + } else { + sst[running] += v + } + } + o.ObserveInt64(appliedBjCount, bst[applied]) + o.ObserveInt64(runningBjCount, bst[running]) + o.ObserveInt64(completeBjCount, bst[complete]) + return nil + }, + appliedScCount, + runningScCount, + completeScCount, + appliedBjCount, + runningBjCount, + completeBjCount, + ) + return nil +} diff --git a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml index 8a2f64bbde..5747988b61 100644 --- a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml +++ b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml @@ -1574,7 +1574,7 @@ data: }, "timezone": "", "title": "Vald Benchmark Operator", - "uid": "JkemcMB", + "uid": "fdewjfx1jkxz4b", "version": 1, "weekStart": "" } diff --git a/pkg/tools/benchmark/operator/service/operator.go b/pkg/tools/benchmark/operator/service/operator.go index 9ddf86a82a..5100ec52f5 100644 --- a/pkg/tools/benchmark/operator/service/operator.go +++ b/pkg/tools/benchmark/operator/service/operator.go @@ -38,6 +38,8 @@ import ( type Operator interface { PreStart(context.Context) error Start(context.Context) (<-chan error, error) + LenBenchSC() map[v1.ValdBenchmarkScenarioStatus]int64 + LenBenchBJ() map[v1.BenchmarkJobStatus]int64 } type scenario struct { @@ -640,6 +642,44 @@ func (o *operator) checkAtomics() error { return nil } +func (o *operator) LenBenchSC() map[v1.ValdBenchmarkScenarioStatus]int64 { + m := map[v1.ValdBenchmarkScenarioStatus]int64{ + v1.BenchmarkScenarioAvailable: 0, + v1.BenchmarkScenarioHealthy: 0, + v1.BenchmarkScenarioNotReady: 0, + v1.BenchmarkScenarioCompleted: 0, + } + if sc := o.getAtomicScenario(); sc != nil { + for _, s := range sc { + if _, ok := m[s.Crd.Status]; ok { + m[s.Crd.Status] += 1 + } else { + m[s.Crd.Status] = 1 + } + } + } + return m +} + +func (o *operator) LenBenchBJ() map[v1.BenchmarkJobStatus]int64 { + m := map[v1.BenchmarkJobStatus]int64{ + v1.BenchmarkJobAvailable: 0, + v1.BenchmarkJobHealthy: 0, + v1.BenchmarkJobNotReady: 0, + v1.BenchmarkJobCompleted: 0, + } + if bjs := o.getAtomicBenchJob(); bjs != nil { + for _, bj := range bjs { + if _, ok := m[bj.Status]; ok { + m[bj.Status] += 1 + } else { + m[bj.Status] = 1 + } + } + } + return m +} + func (*operator) PreStart(context.Context) error { log.Infof("[benchmark scenario operator] start vald benchmark scenario operator") return nil diff --git a/pkg/tools/benchmark/operator/usecase/benchmarkd.go b/pkg/tools/benchmark/operator/usecase/benchmarkd.go index 134226967a..5f4343f615 100644 --- a/pkg/tools/benchmark/operator/usecase/benchmarkd.go +++ b/pkg/tools/benchmark/operator/usecase/benchmarkd.go @@ -29,6 +29,7 @@ import ( "github.com/vdaas/vald/internal/observability" backoffmetrics "github.com/vdaas/vald/internal/observability/metrics/backoff" infometrics "github.com/vdaas/vald/internal/observability/metrics/info" + benchmarkmetrics "github.com/vdaas/vald/internal/observability/metrics/tools/benchmark" "github.com/vdaas/vald/internal/runner" "github.com/vdaas/vald/internal/safety" "github.com/vdaas/vald/internal/servers/server" @@ -96,6 +97,7 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if cfg.Observability.Enabled { obs, err = observability.NewWithConfig( cfg.Observability, + benchmarkmetrics.New(operator), infometrics.New("benchmark_operator_info", "Benchmark Operator info", *cfg.JobImage), backoffmetrics.New(), ) From 5cd7ba0a72f8a274e4dbd068328de2a6916fb88e Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 13 Mar 2024 16:56:11 +0900 Subject: [PATCH 6/8] :recycle: Update dashboard and rename Signed-off-by: vankichi --- .../metrics/tools/benchmark/benchmark.go | 137 +++- .../metrics/tools/benchmark/benchmark_test.go | 299 +++++++++ .../10-vald-benchmark-operator.yaml | 595 ++++++++++++++++-- .../benchmark/operator/service/operator.go | 15 +- 4 files changed, 957 insertions(+), 89 deletions(-) create mode 100644 internal/observability/metrics/tools/benchmark/benchmark_test.go diff --git a/internal/observability/metrics/tools/benchmark/benchmark.go b/internal/observability/metrics/tools/benchmark/benchmark.go index ff64e8ddc5..6c45c71f2c 100644 --- a/internal/observability/metrics/tools/benchmark/benchmark.go +++ b/internal/observability/metrics/tools/benchmark/benchmark.go @@ -41,6 +41,15 @@ const ( completeBenchmarkJobCount = "benchmark_operator_complete_benchmark_job" completeBenchmarkJobCountDescription = "Benchmark Operator complete benchmark job count" + + appliedJobCount = "benchmark_operator_applied_job" + appliedJobCountDescription = "Benchmark Operator applied job count" + + runningJobCount = "benchmark_operator_running_job" + runningJobCountDescription = "Benchmark Operator running job count" + + completeJobCount = "benchmark_operator_complete_job" + completeJobCountDescription = "Benchmark Operator complete job count" ) const ( @@ -89,12 +98,66 @@ func (om *operatorMetrics) View() ([]metrics.View, error) { Aggregation: view.AggregationLastValue{}, }, ), + view.NewView( + view.Instrument{ + Name: appliedBenchmarkJobCount, + Description: appliedBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: runningBenchmarkJobCount, + Description: runningBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: completeBenchmarkJobCount, + Description: completeBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: appliedJobCount, + Description: appliedJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: runningJobCount, + Description: runningJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: completeJobCount, + Description: completeJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), }, nil } // TODO: implement here func (om *operatorMetrics) Register(m metrics.Meter) error { - appliedScCount, err := m.Int64ObservableCounter( + appliedScenarioCount, err := m.Int64ObservableCounter( appliedScenarioCount, metrics.WithDescription(appliedScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -102,7 +165,7 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - runningScCount, err := m.Int64ObservableCounter( + runningScenarioCount, err := m.Int64ObservableCounter( runningScenarioCount, metrics.WithDescription(runningScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -110,7 +173,7 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - completeScCount, err := m.Int64ObservableCounter( + completeScenarioCount, err := m.Int64ObservableCounter( completeScenarioCount, metrics.WithDescription(completeScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -119,7 +182,7 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { return err } - appliedBjCount, err := m.Int64ObservableCounter( + appliedBenchJobCount, err := m.Int64ObservableCounter( appliedBenchmarkJobCount, metrics.WithDescription(appliedScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -127,7 +190,7 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - runningBjCount, err := m.Int64ObservableCounter( + runningBenchJobCount, err := m.Int64ObservableCounter( runningBenchmarkJobCount, metrics.WithDescription(runningScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -135,7 +198,31 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - completeBjCount, err := m.Int64ObservableCounter( + completeBenchJobCount, err := m.Int64ObservableCounter( + completeBenchmarkJobCount, + metrics.WithDescription(completeScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + appliedJobCount, err := m.Int64ObservableCounter( + appliedJobCount, + metrics.WithDescription(appliedJobCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + runningJobCount, err := m.Int64ObservableCounter( + runningJobCount, + metrics.WithDescription(runningJobCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + completeJobCount, err := m.Int64ObservableCounter( completeBenchmarkJobCount, metrics.WithDescription(completeScenarioCountDescription), metrics.WithUnit(metrics.Dimensionless), @@ -143,7 +230,6 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - _, err = m.RegisterCallback( func(_ context.Context, o api.Observer) error { // scenario status @@ -152,7 +238,7 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { running: 0, complete: 0, } - for k, v := range om.op.LenBenchSC() { + for k, v := range om.op.GetScenarioStatus() { sst[applied] += v if k == v1.BenchmarkScenarioCompleted { sst[complete] += v @@ -160,9 +246,9 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { sst[running] += v } } - o.ObserveInt64(appliedScCount, sst[applied]) - o.ObserveInt64(runningScCount, sst[running]) - o.ObserveInt64(completeScCount, sst[complete]) + o.ObserveInt64(appliedScenarioCount, sst[applied]) + o.ObserveInt64(runningScenarioCount, sst[running]) + o.ObserveInt64(completeScenarioCount, sst[complete]) // benchmark job status bst := map[string]int64{ @@ -170,25 +256,28 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { running: 0, complete: 0, } - for k, v := range om.op.LenBenchBJ() { - sst[applied] += v + for k, v := range om.op.GetBenchmarkJobStatus() { + bst[applied] += v if k == v1.BenchmarkJobCompleted { - sst[complete] += v + bst[complete] += v } else { - sst[running] += v + bst[running] += v } } - o.ObserveInt64(appliedBjCount, bst[applied]) - o.ObserveInt64(runningBjCount, bst[running]) - o.ObserveInt64(completeBjCount, bst[complete]) + o.ObserveInt64(appliedBenchJobCount, bst[applied]) + o.ObserveInt64(runningBenchJobCount, bst[running]) + o.ObserveInt64(completeBenchJobCount, bst[complete]) return nil }, - appliedScCount, - runningScCount, - completeScCount, - appliedBjCount, - runningBjCount, - completeBjCount, + appliedScenarioCount, + runningScenarioCount, + completeScenarioCount, + appliedBenchJobCount, + runningBenchJobCount, + completeBenchJobCount, + appliedJobCount, + runningJobCount, + completeJobCount, ) return nil } diff --git a/internal/observability/metrics/tools/benchmark/benchmark_test.go b/internal/observability/metrics/tools/benchmark/benchmark_test.go new file mode 100644 index 0000000000..ba64190d5a --- /dev/null +++ b/internal/observability/metrics/tools/benchmark/benchmark_test.go @@ -0,0 +1,299 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package benchmark + +import ( + "reflect" + "testing" + + "github.com/pkg/errors" + "github.com/vdaas/vald/internal/observability/metrics" + "github.com/vdaas/vald/internal/test/goleak" + "github.com/vdaas/vald/pkg/tools/benchmark/operator/service" +) + +func TestNew(t *testing.T) { + type args struct { + om service.Operator + } + type want struct { + want metrics.Metric + } + type test struct { + name string + args args + want want + checkFunc func(want, metrics.Metric) error + beforeFunc func(*testing.T, args) + afterFunc func(*testing.T, args) + } + defaultCheckFunc := func(w want, got metrics.Metric) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + om:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + om:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt, test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(tt, test.args) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + + got := New(test.args.om) + if err := checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} + +func Test_operatorMetrics_View(t *testing.T) { + type fields struct { + op service.Operator + } + type want struct { + want []metrics.View + err error + } + type test struct { + name string + fields fields + want want + checkFunc func(want, []metrics.View, error) error + beforeFunc func(*testing.T) + afterFunc func(*testing.T) + } + defaultCheckFunc := func(w want, got []metrics.View, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T,) { + t.Helper() + }, + afterFunc: func(t *testing.T,) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T,) { + t.Helper() + }, + afterFunc: func(t *testing.T,) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt) + } + if test.afterFunc != nil { + defer test.afterFunc(tt) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + om := &operatorMetrics{ + op: test.fields.op, + } + + got, err := om.View() + if err := checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} + +func Test_operatorMetrics_Register(t *testing.T) { + type args struct { + m metrics.Meter + } + type fields struct { + op service.Operator + } + type want struct { + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, error) error + beforeFunc func(*testing.T, args) + afterFunc func(*testing.T, args) + } + defaultCheckFunc := func(w want, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + m:nil, + }, + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + m:nil, + }, + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt, test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(tt, test.args) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + om := &operatorMetrics{ + op: test.fields.op, + } + + err := om.Register(test.args.m) + if err := checkFunc(test.want, err); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} diff --git a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml index 5747988b61..9ee9d26a62 100644 --- a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml +++ b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml @@ -79,8 +79,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -91,7 +90,6 @@ data: "y": 0 }, "id": 14, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -110,7 +108,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -161,8 +159,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -173,7 +170,6 @@ data: "y": 0 }, "id": 16, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -192,7 +188,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -242,8 +238,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -254,7 +249,6 @@ data: "y": 0 }, "id": 34, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -273,7 +267,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -327,8 +321,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -339,7 +332,6 @@ data: "y": 0 }, "id": 19, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -358,7 +350,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -414,8 +406,7 @@ data: } ] }, - "unit": "decbytes", - "unitScale": true + "unit": "decbytes" }, "overrides": [] }, @@ -426,7 +417,6 @@ data: "y": 0 }, "id": 20, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -445,7 +435,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -496,8 +486,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -508,7 +497,6 @@ data: "y": 3 }, "id": 17, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -527,7 +515,7 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -577,8 +565,7 @@ data: } ] }, - "unit": "none", - "unitScale": true + "unit": "none" }, "overrides": [] }, @@ -589,7 +576,6 @@ data: "y": 3 }, "id": 18, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -609,7 +595,7 @@ data: "wideLayout": true }, "pluginVersin": "8.0.1", - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -649,19 +635,305 @@ data: } ] }, - "unit": "string", - "unitScale": true + "unit": "string" }, "overrides": [] }, "gridPos": { "h": 3, - "w": 8, + "w": 4, "x": 0, "y": 6 }, + "id": 42, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_applied_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "All Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 6 + }, + "id": 43, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_running_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Running Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 6 + }, + "id": 44, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_complete_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Completed Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 12, + "y": 6 + }, + "id": 41, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^image$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"image\", \"$1\", \"image\", \"(.*):.*\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Job Image Name", + "transformations": [ + { + "id": "partitionByValues", + "options": { + "keepFields": true, + "naming": { + "asLabels": true + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 6 + }, "id": 40, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -680,7 +952,76 @@ data: "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"image\", \"$1\", \"image\", \".*:(.*)\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Job Image Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 45, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -688,7 +1029,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "expr": "sum(benchmark_operator_applied_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", "format": "table", "instant": true, "interval": "", @@ -696,8 +1037,146 @@ data: "refId": "A" } ], - "title": "Job Image", - "transformations": [], + "title": "All Benchmark Job Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 46, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_running_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Running Benchmark Job Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 47, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_complete_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Completed Benchmark Job Count", "type": "stat" }, { @@ -706,7 +1185,7 @@ data: "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 12 }, "id": 39, "panels": [], @@ -724,8 +1203,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -735,7 +1213,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 10 + "y": 13 }, "hiddenSeries": false, "id": 22, @@ -755,7 +1233,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", @@ -850,8 +1328,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -861,7 +1338,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 10 + "y": 13 }, "hiddenSeries": false, "id": 27, @@ -881,7 +1358,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", @@ -976,8 +1453,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -987,7 +1463,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 18 + "y": 21 }, "hiddenSeries": false, "id": 30, @@ -1008,7 +1484,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", @@ -1086,8 +1562,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -1097,7 +1572,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 18 + "y": 21 }, "hiddenSeries": false, "id": 32, @@ -1118,7 +1593,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", @@ -1209,8 +1684,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -1220,7 +1694,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 29 }, "hiddenSeries": false, "id": 36, @@ -1240,7 +1714,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", @@ -1305,8 +1779,7 @@ data: }, "fieldConfig": { "defaults": { - "links": [], - "unitScale": true + "links": [] }, "overrides": [] }, @@ -1316,7 +1789,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 26 + "y": 29 }, "hiddenSeries": false, "id": 38, @@ -1337,7 +1810,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.3.3", + "pluginVersion": "10.4.0", "pointradius": 2, "points": false, "renderer": "flot", diff --git a/pkg/tools/benchmark/operator/service/operator.go b/pkg/tools/benchmark/operator/service/operator.go index 5100ec52f5..89beca20b9 100644 --- a/pkg/tools/benchmark/operator/service/operator.go +++ b/pkg/tools/benchmark/operator/service/operator.go @@ -38,8 +38,9 @@ import ( type Operator interface { PreStart(context.Context) error Start(context.Context) (<-chan error, error) - LenBenchSC() map[v1.ValdBenchmarkScenarioStatus]int64 - LenBenchBJ() map[v1.BenchmarkJobStatus]int64 + GetScenarioStatus() map[v1.ValdBenchmarkScenarioStatus]int64 + GetBenchmarkJobStatus() map[v1.BenchmarkJobStatus]int64 + // GetJobStatus() map[v1.BenchmarkJobStatus]int64 } type scenario struct { @@ -642,7 +643,7 @@ func (o *operator) checkAtomics() error { return nil } -func (o *operator) LenBenchSC() map[v1.ValdBenchmarkScenarioStatus]int64 { +func (o *operator) GetScenarioStatus() map[v1.ValdBenchmarkScenarioStatus]int64 { m := map[v1.ValdBenchmarkScenarioStatus]int64{ v1.BenchmarkScenarioAvailable: 0, v1.BenchmarkScenarioHealthy: 0, @@ -661,7 +662,7 @@ func (o *operator) LenBenchSC() map[v1.ValdBenchmarkScenarioStatus]int64 { return m } -func (o *operator) LenBenchBJ() map[v1.BenchmarkJobStatus]int64 { +func (o *operator) GetBenchmarkJobStatus() map[v1.BenchmarkJobStatus]int64 { m := map[v1.BenchmarkJobStatus]int64{ v1.BenchmarkJobAvailable: 0, v1.BenchmarkJobHealthy: 0, @@ -680,6 +681,12 @@ func (o *operator) LenBenchBJ() map[v1.BenchmarkJobStatus]int64 { return m } +// func (o *operator) GetJobStatus() map[job.JobStatus]int64 { +// m := map[job.JobStatus]int64{} +// // if js := o.getAtomicJob() +// return m +// } + func (*operator) PreStart(context.Context) error { log.Infof("[benchmark scenario operator] start vald benchmark scenario operator") return nil From d3c4a02d642d6b99eae5de00d26812338222df06 Mon Sep 17 00:00:00 2001 From: vankichi Date: Thu, 14 Mar 2024 10:55:54 +0900 Subject: [PATCH 7/8] :recycle: Fix Signed-off-by: vankichi --- .../metrics/tools/benchmark/benchmark.go | 124 +++++++++--------- .../metrics/tools/benchmark/benchmark_test.go | 2 +- .../benchmark/operator/service/operator.go | 13 +- 3 files changed, 67 insertions(+), 72 deletions(-) diff --git a/internal/observability/metrics/tools/benchmark/benchmark.go b/internal/observability/metrics/tools/benchmark/benchmark.go index 6c45c71f2c..f53c95a06a 100644 --- a/internal/observability/metrics/tools/benchmark/benchmark.go +++ b/internal/observability/metrics/tools/benchmark/benchmark.go @@ -42,14 +42,14 @@ const ( completeBenchmarkJobCount = "benchmark_operator_complete_benchmark_job" completeBenchmarkJobCountDescription = "Benchmark Operator complete benchmark job count" - appliedJobCount = "benchmark_operator_applied_job" - appliedJobCountDescription = "Benchmark Operator applied job count" + // appliedJobCount = "benchmark_operator_applied_job" + // appliedJobCountDescription = "Benchmark Operator applied job count" - runningJobCount = "benchmark_operator_running_job" - runningJobCountDescription = "Benchmark Operator running job count" + // runningJobCount = "benchmark_operator_running_job" + // runningJobCountDescription = "Benchmark Operator running job count" - completeJobCount = "benchmark_operator_complete_job" - completeJobCountDescription = "Benchmark Operator complete job count" + // completeJobCount = "benchmark_operator_complete_job" + // completeJobCountDescription = "Benchmark Operator complete job count" ) const ( @@ -125,33 +125,33 @@ func (om *operatorMetrics) View() ([]metrics.View, error) { Aggregation: view.AggregationLastValue{}, }, ), - view.NewView( - view.Instrument{ - Name: appliedJobCount, - Description: appliedJobCountDescription, - }, - view.Stream{ - Aggregation: view.AggregationLastValue{}, - }, - ), - view.NewView( - view.Instrument{ - Name: runningJobCount, - Description: runningJobCountDescription, - }, - view.Stream{ - Aggregation: view.AggregationLastValue{}, - }, - ), - view.NewView( - view.Instrument{ - Name: completeJobCount, - Description: completeJobCountDescription, - }, - view.Stream{ - Aggregation: view.AggregationLastValue{}, - }, - ), + // view.NewView( + // view.Instrument{ + // Name: appliedJobCount, + // Description: appliedJobCountDescription, + // }, + // view.Stream{ + // Aggregation: view.AggregationLastValue{}, + // }, + // ), + // view.NewView( + // view.Instrument{ + // Name: runningJobCount, + // Description: runningJobCountDescription, + // }, + // view.Stream{ + // Aggregation: view.AggregationLastValue{}, + // }, + // ), + // view.NewView( + // view.Instrument{ + // Name: completeJobCount, + // Description: completeJobCountDescription, + // }, + // view.Stream{ + // Aggregation: view.AggregationLastValue{}, + // }, + // ), }, nil } @@ -206,30 +206,32 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { if err != nil { return err } - appliedJobCount, err := m.Int64ObservableCounter( - appliedJobCount, - metrics.WithDescription(appliedJobCountDescription), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } - runningJobCount, err := m.Int64ObservableCounter( - runningJobCount, - metrics.WithDescription(runningJobCountDescription), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } - completeJobCount, err := m.Int64ObservableCounter( - completeBenchmarkJobCount, - metrics.WithDescription(completeScenarioCountDescription), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } + + // appliedJobCount, err := m.Int64ObservableCounter( + // appliedJobCount, + // metrics.WithDescription(appliedJobCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + // runningJobCount, err := m.Int64ObservableCounter( + // runningJobCount, + // metrics.WithDescription(runningJobCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + // completeJobCount, err := m.Int64ObservableCounter( + // completeBenchmarkJobCount, + // metrics.WithDescription(completeScenarioCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + _, err = m.RegisterCallback( func(_ context.Context, o api.Observer) error { // scenario status @@ -275,9 +277,9 @@ func (om *operatorMetrics) Register(m metrics.Meter) error { appliedBenchJobCount, runningBenchJobCount, completeBenchJobCount, - appliedJobCount, - runningJobCount, - completeJobCount, + // appliedJobCount, + // runningJobCount, + // completeJobCount, ) - return nil + return err } diff --git a/internal/observability/metrics/tools/benchmark/benchmark_test.go b/internal/observability/metrics/tools/benchmark/benchmark_test.go index ba64190d5a..b444083232 100644 --- a/internal/observability/metrics/tools/benchmark/benchmark_test.go +++ b/internal/observability/metrics/tools/benchmark/benchmark_test.go @@ -17,7 +17,7 @@ import ( "reflect" "testing" - "github.com/pkg/errors" + "github.com/vdaas/vald/internal/errors" "github.com/vdaas/vald/internal/observability/metrics" "github.com/vdaas/vald/internal/test/goleak" "github.com/vdaas/vald/pkg/tools/benchmark/operator/service" diff --git a/pkg/tools/benchmark/operator/service/operator.go b/pkg/tools/benchmark/operator/service/operator.go index 89beca20b9..fbd4a05ab8 100644 --- a/pkg/tools/benchmark/operator/service/operator.go +++ b/pkg/tools/benchmark/operator/service/operator.go @@ -450,6 +450,7 @@ func (o *operator) createBenchmarkJob(ctx context.Context, scenario v1.ValdBench } // set status bj.Status = v1.BenchmarkJobNotReady + // TODO: set metrics // create benchmark job resource c := o.ctrl.GetManager().GetClient() if err := c.Create(ctx, bj); err != nil { @@ -652,11 +653,7 @@ func (o *operator) GetScenarioStatus() map[v1.ValdBenchmarkScenarioStatus]int64 } if sc := o.getAtomicScenario(); sc != nil { for _, s := range sc { - if _, ok := m[s.Crd.Status]; ok { - m[s.Crd.Status] += 1 - } else { - m[s.Crd.Status] = 1 - } + m[s.Crd.Status] += 1 } } return m @@ -671,11 +668,7 @@ func (o *operator) GetBenchmarkJobStatus() map[v1.BenchmarkJobStatus]int64 { } if bjs := o.getAtomicBenchJob(); bjs != nil { for _, bj := range bjs { - if _, ok := m[bj.Status]; ok { - m[bj.Status] += 1 - } else { - m[bj.Status] = 1 - } + m[bj.Status] += 1 } } return m From 00a95885dbea3d9485a189c916b2a2a15bf7252a Mon Sep 17 00:00:00 2001 From: Kiichiro YUKAWA Date: Thu, 14 Mar 2024 15:54:16 +0900 Subject: [PATCH 8/8] Update internal/observability/metrics/tools/benchmark/benchmark.go Co-authored-by: Hiroto Funakoshi Signed-off-by: Kiichiro YUKAWA --- .../metrics/tools/benchmark/benchmark.go | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/internal/observability/metrics/tools/benchmark/benchmark.go b/internal/observability/metrics/tools/benchmark/benchmark.go index f53c95a06a..370e07cbff 100644 --- a/internal/observability/metrics/tools/benchmark/benchmark.go +++ b/internal/observability/metrics/tools/benchmark/benchmark.go @@ -125,33 +125,6 @@ func (om *operatorMetrics) View() ([]metrics.View, error) { Aggregation: view.AggregationLastValue{}, }, ), - // view.NewView( - // view.Instrument{ - // Name: appliedJobCount, - // Description: appliedJobCountDescription, - // }, - // view.Stream{ - // Aggregation: view.AggregationLastValue{}, - // }, - // ), - // view.NewView( - // view.Instrument{ - // Name: runningJobCount, - // Description: runningJobCountDescription, - // }, - // view.Stream{ - // Aggregation: view.AggregationLastValue{}, - // }, - // ), - // view.NewView( - // view.Instrument{ - // Name: completeJobCount, - // Description: completeJobCountDescription, - // }, - // view.Stream{ - // Aggregation: view.AggregationLastValue{}, - // }, - // ), }, nil }