Skip to content

Commit

Permalink
Make agent export index metrics to Pod k8s resource (#2319)
Browse files Browse the repository at this point in the history
* Add config

* Add clusterRole and serviceAccount for agent

* Add more metrics

* Format

* Add export index info duration configuration

* Refactor internal/k8s

* Move apply function to internal/k8s

* Add more config

* Remove unused import and variable

* Use vald errors

* Add pod name and pod namespace to hack e2e agent

* Apply tagalign

* Revert unnecesarry specification of PodName

* Enable export index info to Kubernetes in agent templates only when the config is enabled

* Fix agent clusterrole name

* Update key name

* Update comments

* Add index count as export metrics

* Ignore gomnd

* Add index count entry to exportMetricsOnCreateIndex function

* Update values schema

* Fix formatter job

* Remove unused defines
  • Loading branch information
ykadowak authored Feb 16, 2024
1 parent b906fd2 commit 699ba22
Show file tree
Hide file tree
Showing 18 changed files with 607 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
run: |
make deps/install
make format
git checkout go.mod go.sum ./rust/Cargo.lock
git checkout go.mod go.sum ./example/client/go.mod ./example/client/go.sum ./rust/Cargo.lock
- name: Check format and deps difference
run: |
if git diff --quiet --exit-code; then
Expand Down
29 changes: 29 additions & 0 deletions charts/vald-helm-operator/crds/valdrelease.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,20 @@ spec:
annotations:
type: object
x-kubernetes-preserve-unknown-fields: true
clusterRole:
type: object
properties:
enabled:
type: boolean
name:
type: string
clusterRoleBinding:
type: object
properties:
enabled:
type: boolean
name:
type: string
enabled:
type: boolean
env:
Expand Down Expand Up @@ -251,13 +265,17 @@ spec:
- normalizedcosine
enable_copy_on_write:
type: boolean
enable_export_index_info_to_k8s:
type: boolean
enable_in_memory_mode:
type: boolean
enable_proactive_gc:
type: boolean
error_buffer_limit:
type: integer
minimum: 1
export_index_info_duration:
type: string
index_path:
type: string
initial_delay_max_duration:
Expand All @@ -273,12 +291,16 @@ spec:
type: string
min_load_index_timeout:
type: string
namespace:
type: string
object_type:
type: string
enum:
- float
- float16
- uint8
pod_name:
type: string
search_edge_size:
type: integer
vqueue:
Expand Down Expand Up @@ -959,6 +981,13 @@ spec:
labels:
type: object
x-kubernetes-preserve-unknown-fields: true
serviceAccount:
type: object
properties:
enabled:
type: boolean
name:
type: string
serviceType:
type: string
enum:
Expand Down
37 changes: 37 additions & 0 deletions charts/vald/templates/agent/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (C) 2019-2024 vdaas.org vald team <vald@vdaas.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
{{- $agent := .Values.agent -}}
{{- if and $agent.enabled $agent.clusterRole.enabled $agent.ngt.enable_export_index_info_to_k8s }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ $agent.clusterRole.name }}
labels:
app.kubernetes.io/name: {{ include "vald.name" . }}
helm.sh/chart: {{ include "vald.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.Version }}
app.kubernetes.io/component: agent
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- patch
{{- end }}
37 changes: 37 additions & 0 deletions charts/vald/templates/agent/clusterrolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (C) 2019-2024 vdaas.org vald team <vald@vdaas.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
{{- $agent := .Values.agent -}}
{{- if and $agent.enabled $agent.clusterRoleBinding.enabled $agent.ngt.enable_export_index_info_to_k8s }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ $agent.clusterRoleBinding.name }}
labels:
app.kubernetes.io/name: {{ include "vald.name" . }}
helm.sh/chart: {{ include "vald.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.Version }}
app.kubernetes.io/component: agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ $agent.clusterRole.name }}
subjects:
- kind: ServiceAccount
name: {{ $agent.serviceAccount.name }}
namespace: {{ .Release.Namespace }}
{{- end }}
3 changes: 3 additions & 0 deletions charts/vald/templates/agent/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ spec:
{{- toYaml $agent.podSecurityContext | nindent 8 }}
{{- end }}
terminationGracePeriodSeconds: {{ $agent.terminationGracePeriodSeconds }}
{{- if and $agent.serviceAccount.enabled $agent.ngt.enable_export_index_info_to_k8s }}
serviceAccountName: {{ $agent.serviceAccount.name }}
{{- end }}
volumes:
- name: {{ $agent.name }}-config
configMap:
Expand Down
3 changes: 3 additions & 0 deletions charts/vald/templates/agent/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ spec:
{{- toYaml $agent.podSecurityContext | nindent 8 }}
{{- end }}
terminationGracePeriodSeconds: {{ $agent.terminationGracePeriodSeconds }}
{{- if and $agent.serviceAccount.enabled $agent.ngt.enable_export_index_info_to_k8s }}
serviceAccountName: {{ $agent.serviceAccount.name }}
{{- end }}
volumes:
- name: {{ $agent.name }}-config
configMap:
Expand Down
29 changes: 29 additions & 0 deletions charts/vald/templates/agent/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Copyright (C) 2019-2024 vdaas.org vald team <vald@vdaas.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
{{- $agent := .Values.agent -}}
{{- if and $agent.enabled $agent.serviceAccount.enabled $agent.ngt.enable_export_index_info_to_k8s }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ $agent.serviceAccount.name }}
labels:
app.kubernetes.io/name: {{ include "vald.name" . }}
helm.sh/chart: {{ include "vald.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.Version }}
app.kubernetes.io/component: agent
{{- end }}
3 changes: 3 additions & 0 deletions charts/vald/templates/agent/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ spec:
{{- toYaml $agent.podSecurityContext | nindent 8 }}
{{- end }}
terminationGracePeriodSeconds: {{ $agent.terminationGracePeriodSeconds }}
{{- if and $agent.serviceAccount.enabled $agent.ngt.enable_export_index_info_to_k8s }}
serviceAccountName: {{ $agent.serviceAccount.name }}
{{- end }}
volumes:
- name: {{ $agent.name }}-config
configMap:
Expand Down
52 changes: 52 additions & 0 deletions charts/vald/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,29 @@
"type": "object",
"description": "deployment annotations"
},
"clusterRole": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"description": "creates clusterRole resource"
},
"name": { "type": "string", "description": "name of clusterRole" }
}
},
"clusterRoleBinding": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"description": "creates clusterRoleBinding resource"
},
"name": {
"type": "string",
"description": "name of clusterRoleBinding"
}
}
},
"enabled": { "type": "boolean", "description": "agent enabled" },
"env": {
"type": "array",
Expand Down Expand Up @@ -238,6 +261,10 @@
"type": "boolean",
"description": "enable copy on write saving for more stable backup"
},
"enable_export_index_info_to_k8s": {
"type": "boolean",
"description": "enable export index info to k8s"
},
"enable_in_memory_mode": {
"type": "boolean",
"description": "in-memory mode enabled"
Expand All @@ -251,6 +278,10 @@
"description": "maximum number of core ngt error buffer pool size limit",
"minimum": 1
},
"export_index_info_duration": {
"type": "string",
"description": "duration of exporting index info"
},
"index_path": {
"type": "string",
"description": "path to index data"
Expand Down Expand Up @@ -280,11 +311,19 @@
"type": "string",
"description": "minimum duration of load index timeout"
},
"namespace": {
"type": "string",
"description": "namespace of myself"
},
"object_type": {
"type": "string",
"description": "object type. it should be `float` or `uint8` or `float16`. for further details: https://github.com/yahoojapan/NGT/wiki/Command-Quick-Reference",
"enum": ["float", "float16", "uint8"]
},
"pod_name": {
"type": "string",
"description": "pod name of myself"
},
"search_edge_size": {
"type": "integer",
"description": "search edge size"
Expand Down Expand Up @@ -1459,6 +1498,19 @@
"labels": { "type": "object", "description": "service labels" }
}
},
"serviceAccount": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"description": "creates service account"
},
"name": {
"type": "string",
"description": "name of service account"
}
}
},
"serviceType": {
"type": "string",
"description": "service type: ClusterIP, LoadBalancer or NodePort",
Expand Down
36 changes: 36 additions & 0 deletions charts/vald/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,30 @@ agent:
# @schema {"name": "agent.initContainers", "alias": "initContainers"}
# agent.initContainers -- init containers
initContainers: []
# @schema {"name": "agent.clusterRole", "type": "object"}
clusterRole:
# @schema {"name": "agent.clusterRole.enabled", "type": "boolean"}
# agent.clusterRole.enabled -- creates clusterRole resource
enabled: true
# @schema {"name": "agent.clusterRole.name", "type": "string"}
# agent.clusterRole.name -- name of clusterRole
name: agent
# @schema {"name": "agent.clusterRoleBinding", "type": "object"}
clusterRoleBinding:
# @schema {"name": "agent.clusterRoleBinding.enabled", "type": "boolean"}
# agent.clusterRoleBinding.enabled -- creates clusterRoleBinding resource
enabled: true
# @schema {"name": "agent.clusterRoleBinding.name", "type": "string"}
# agent.clusterRoleBinding.name -- name of clusterRoleBinding
name: agent
# @schema {"name": "agent.serviceAccount", "type": "object"}
serviceAccount:
# @schema {"name": "agent.serviceAccount.enabled", "type": "boolean"}
# agent.serviceAccount.enabled -- creates service account
enabled: true
# @schema {"name": "agent.serviceAccount.name", "type": "string"}
# agent.serviceAccount.name -- name of service account
name: agent-ngt
# @schema {"name": "agent.env", "alias": "env"}
# agent.env -- environment variables
env:
Expand Down Expand Up @@ -2035,6 +2059,12 @@ agent:
annotations: {}
# @schema {"name": "agent.ngt", "type": "object"}
ngt:
# @schema {"name": "agent.ngt.pod_name", "type": "string"}
# agent.ngt.pod_name -- pod name of myself
pod_name: _MY_POD_NAME_
# @schema {"name": "agent.ngt.namespace", "type": "string"}
# agent.ngt.namespace -- namespace of myself
namespace: _MY_POD_NAMESPACE_
# @schema {"name": "agent.ngt.index_path", "type": "string"}
# agent.ngt.index_path -- path to index data
index_path: ""
Expand Down Expand Up @@ -2107,6 +2137,12 @@ agent:
# @schema {"name": "agent.ngt.enable_copy_on_write", "type": "boolean"}
# agent.ngt.enable_copy_on_write -- enable copy on write saving for more stable backup
enable_copy_on_write: false
# @schema {"name": "agent.ngt.enable_export_index_info_to_k8s", "type": "boolean"}
# agent.ngt.enable_export_index_info_to_k8s -- enable export index info to k8s
enable_export_index_info_to_k8s: false
# @schema {"name": "agent.ngt.export_index_info_duration", "type": "string"}
# agent.ngt.export_index_info_duration -- duration of exporting index info
export_index_info_duration: 1m
# @schema {"name": "agent.ngt.vqueue", "type": "object"}
vqueue:
# @schema {"name": "agent.ngt.vqueue.insert_buffer_pool_size", "type": "integer"}
Expand Down
2 changes: 2 additions & 0 deletions cmd/agent/core/ngt/sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ observability:
service_name: "vald-agent-ngt"
buffer_max_count: 10
ngt:
pod_name: "vald-agent-ngt-0" # this might overwrite k8s resource of agent pod 0
namespace: "default"
auto_create_index_pool_size: 10000
auto_index_check_duration: 30m
auto_index_duration_limit: 24h
Expand Down
Loading

0 comments on commit 699ba22

Please sign in to comment.