From ebb5efa0051b954e3154d43f1b3f5bc720c0f2d9 Mon Sep 17 00:00:00 2001 From: Sebastian Daberdaku Date: Wed, 25 Sep 2024 21:31:35 +0200 Subject: [PATCH] Add option to enable worker graceful shutdown --- charts/trino/README.md | 27 +++++++++- ...configmap-access-control-coordinator.yaml} | 0 .../configmap-access-control-worker.yaml | 25 +++++++++ .../templates/configmap-coordinator.yaml | 3 ++ charts/trino/templates/configmap-worker.yaml | 9 ++++ .../templates/deployment-coordinator.yaml | 2 +- charts/trino/templates/deployment-worker.yaml | 26 ++++++++++ .../tests/test-graceful-shutdown.yaml | 51 +++++++++++++++++++ charts/trino/values.yaml | 42 +++++++++++---- test-graceful-shutdown-values.yaml | 9 ++++ test.sh | 3 +- 11 files changed, 184 insertions(+), 13 deletions(-) rename charts/trino/templates/{configmap-access-control.yaml => configmap-access-control-coordinator.yaml} (100%) create mode 100644 charts/trino/templates/configmap-access-control-worker.yaml create mode 100644 charts/trino/templates/tests/test-graceful-shutdown.yaml create mode 100644 test-graceful-shutdown-values.yaml diff --git a/charts/trino/README.md b/charts/trino/README.md index d7995c25..5c131e00 100644 --- a/charts/trino/README.md +++ b/charts/trino/README.md @@ -494,9 +494,11 @@ Fast distributed SQL query engine for big data analytics that helps you explore Allows mounting additional Trino configuration files from Kubernetes secrets on the coordinator node. Example: + ```yaml - name: sample-secret secretName: sample-secret path: /secrets/sample.json + ``` * `worker.jvm.maxHeapSize` - string, default: `"8G"` * `worker.jvm.gcMethod.type` - string, default: `"UseG1GC"` * `worker.jvm.gcMethod.g1.heapRegionSize` - string, default: `"32M"` @@ -550,13 +552,34 @@ Fast distributed SQL query engine for big data analytics that helps you explore ``` * `worker.lifecycle` - object, default: `{}` - To enable [graceful shutdown](https://trino.io/docs/current/admin/graceful-shutdown.html), define a lifecycle preStop like bellow, Set the `terminationGracePeriodSeconds` to a value greater than or equal to the configured `shutdown.grace-period`. Configure `shutdown.grace-period` in `additionalConfigProperties` as `shutdown.grace-period=2m` (default is 2 minutes). Also configure `accessControl` because the `default` system access control does not allow graceful shutdowns. + Worker container [lifecycle events](https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/) Example: ```yaml preStop: exec: - command: ["/bin/sh", "-c", "curl -v -X PUT -d '\"SHUTTING_DOWN\"' -H \"Content-type: application/json\" http://localhost:8081/v1/info/state"] + command: ["/bin/sh", "-c", "sleep 120"] + ``` + If provided, it will override the `preStop` lifecycle event configured by `gracefulShutdown`. +* `worker.gracefulShutdown` - object, default: `{"accessControl":{"configFile":"graceful-shutdown-rules.json","user":"admin"},"enabled":false,"gracePeriod":"2m"}` + + Configure [graceful shutdown](https://trino.io/docs/current/admin/graceful-shutdown.html) + Example: + ```yaml + gracefulShutdown: + enabled: true + gracePeriod: 2m + accessControl: + user: admin + configFile: graceful-shutdown-rules.json ``` + Enabling this feature will: + 1) Add a `preStop` lifecycle event to all worker Pods; + 2) Set the `shutdown.grace-period` configuration property to `gracePeriod`; + 3) Configure the workers' `accessControl` since the `default` system access control [does not allow graceful + shutdowns](https://trino.io/docs/current/admin/graceful-shutdown.html). + The user must set the `terminationGracePeriodSeconds` to a value of at least two times the configured `gracePeriod`. + The worker that receives the graceful shutdown request [will sleep for `gracePeriod` twice](https://trino.io/docs/current/admin/graceful-shutdown.html#shutdown-behavior). + Setting `worker.lifecycle` will override the `preStop` event set by this configuration. * `worker.terminationGracePeriodSeconds` - int, default: `30` * `worker.nodeSelector` - object, default: `{}` * `worker.tolerations` - list, default: `[]` diff --git a/charts/trino/templates/configmap-access-control.yaml b/charts/trino/templates/configmap-access-control-coordinator.yaml similarity index 100% rename from charts/trino/templates/configmap-access-control.yaml rename to charts/trino/templates/configmap-access-control-coordinator.yaml diff --git a/charts/trino/templates/configmap-access-control-worker.yaml b/charts/trino/templates/configmap-access-control-worker.yaml new file mode 100644 index 00000000..ac792fe7 --- /dev/null +++ b/charts/trino/templates/configmap-access-control-worker.yaml @@ -0,0 +1,25 @@ +{{- if .Values.worker.gracefulShutdown.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "trino.fullname" . }}-access-control-volume-worker + namespace: {{ .Release.Namespace }} + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: worker +data: + {{- with .Values.worker.gracefulShutdown.accessControl }} + {{ .configFile }}: >- + { + "system_information": [ + { + "allow": [ + "read", + "write" + ], + "user": "{{ .user }}" + } + ] + } + {{- end }} +{{- end }} diff --git a/charts/trino/templates/configmap-coordinator.yaml b/charts/trino/templates/configmap-coordinator.yaml index 667ab0e3..ada68613 100644 --- a/charts/trino/templates/configmap-coordinator.yaml +++ b/charts/trino/templates/configmap-coordinator.yaml @@ -76,6 +76,9 @@ data: jmx.rmiregistry.port={{ .Values.jmx.registryPort }} jmx.rmiserver.port={{ .Values.jmx.serverPort }} {{- end }} + {{- if .Values.worker.gracefulShutdown.enabled }} + shutdown.grace-period={{ .Values.worker.gracefulShutdown.gracePeriod }} + {{- end }} {{- if .Values.server.coordinatorExtraConfig }} {{- .Values.server.coordinatorExtraConfig | nindent 4 }} {{- end }} diff --git a/charts/trino/templates/configmap-worker.yaml b/charts/trino/templates/configmap-worker.yaml index 23fb7432..86baf78f 100644 --- a/charts/trino/templates/configmap-worker.yaml +++ b/charts/trino/templates/configmap-worker.yaml @@ -57,10 +57,19 @@ data: {{- range $configValue := .Values.additionalConfigProperties }} {{ $configValue }} {{- end }} + {{- if .Values.worker.gracefulShutdown.enabled }} + shutdown.grace-period={{ .Values.worker.gracefulShutdown.gracePeriod }} + {{- end }} {{- if .Values.server.workerExtraConfig }} {{- .Values.server.workerExtraConfig | nindent 4 }} {{- end }} +{{- if .Values.worker.gracefulShutdown.enabled }} + access-control.properties: | + access-control.name=file + security.config-file={{ .Values.server.config.path }}/access-control/{{ .Values.worker.gracefulShutdown.accessControl.configFile }} +{{- end }} + {{- if .Values.server.exchangeManager }} exchange-manager.properties: | exchange-manager.name={{ .Values.server.exchangeManager.name }} diff --git a/charts/trino/templates/deployment-coordinator.yaml b/charts/trino/templates/deployment-coordinator.yaml index ee28f32d..99434193 100644 --- a/charts/trino/templates/deployment-coordinator.yaml +++ b/charts/trino/templates/deployment-coordinator.yaml @@ -18,7 +18,7 @@ spec: metadata: annotations: {{- if and (eq .Values.accessControl.type "configmap") (not .Values.accessControl.refreshPeriod) }} - checksum/access-control-config: {{ include (print $.Template.BasePath "/configmap-access-control.yaml") . | sha256sum }} + checksum/access-control-config: {{ include (print $.Template.BasePath "/configmap-access-control-coordinator.yaml") . | sha256sum }} {{- end }} checksum/catalog-config: {{ include (print $.Template.BasePath "/configmap-catalog.yaml") . | sha256sum }} checksum/coordinator-config: {{ include (print $.Template.BasePath "/configmap-coordinator.yaml") . | sha256sum }} diff --git a/charts/trino/templates/deployment-worker.yaml b/charts/trino/templates/deployment-worker.yaml index 521e7990..adbeebc0 100644 --- a/charts/trino/templates/deployment-worker.yaml +++ b/charts/trino/templates/deployment-worker.yaml @@ -23,6 +23,9 @@ spec: annotations: checksum/catalog-config: {{ include (print $.Template.BasePath "/configmap-catalog.yaml") . | sha256sum }} checksum/worker-config: {{ include (print $.Template.BasePath "/configmap-worker.yaml") . | sha256sum }} + {{- if .Values.worker.gracefulShutdown.enabled }} + checksum/access-control-config: {{ include (print $.Template.BasePath "/configmap-access-control-worker.yaml") . | sha256sum }} + {{- end }} {{- if .Values.worker.annotations }} {{- tpl (toYaml .Values.worker.annotations) . | nindent 8 }} {{- end }} @@ -51,6 +54,11 @@ spec: - name: schemas-volume configMap: name: {{ template "trino.fullname" . }}-schemas-volume-worker + {{- if .Values.worker.gracefulShutdown.enabled }} + - name: access-control-volume + configMap: + name: {{ template "trino.fullname" . }}-access-control-volume-worker + {{- end }} {{- range .Values.configMounts }} - name: {{ .name }} configMap: @@ -98,6 +106,10 @@ spec: name: catalog-volume - mountPath: {{ .Values.kafka.mountPath }} name: schemas-volume + {{- if .Values.worker.gracefulShutdown.enabled }} + - mountPath: {{ .Values.server.config.path }}/access-control + name: access-control-volume + {{- end }} {{- range .Values.configMounts }} - name: {{ .name }} mountPath: {{ .path }} @@ -144,7 +156,21 @@ spec: failureThreshold: {{ .Values.worker.readinessProbe.failureThreshold | default 6 }} successThreshold: {{ .Values.worker.readinessProbe.successThreshold | default 1 }} lifecycle: + {{- if .Values.worker.lifecycle }} {{- toYaml .Values.worker.lifecycle | nindent 12 }} + {{- else if .Values.worker.gracefulShutdown.enabled }} + preStop: + exec: + command: + - "/bin/sh" + - "-c" + - >- + curl -v -X PUT + -d '"SHUTTING_DOWN"' + -H 'Content-type: application/json' + -H 'X-Trino-User: {{ .Values.worker.gracefulShutdown.accessControl.user }}' + http://localhost:{{- .Values.service.port -}}/v1/info/state + {{- end }} resources: {{- toYaml .Values.worker.resources | nindent 12 }} {{- if .Values.sidecarContainers.worker }} diff --git a/charts/trino/templates/tests/test-graceful-shutdown.yaml b/charts/trino/templates/tests/test-graceful-shutdown.yaml new file mode 100644 index 00000000..ed9c4529 --- /dev/null +++ b/charts/trino/templates/tests/test-graceful-shutdown.yaml @@ -0,0 +1,51 @@ +{{- if .Values.worker.gracefulShutdown.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "trino.fullname" . }}-workers + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: test + test: graceful-shutdown + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "1" + "helm.sh/hook-delete-policy": hook-succeeded +spec: + type: ClusterIP + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "trino.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: worker +--- +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "trino.fullname" . }}-test-graceful-shutdown + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: test + test: graceful-shutdown + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "2" + "helm.sh/hook-delete-policy": hook-succeeded +spec: + containers: + - name: graceful-shutdown + image: curlimages/curl:latest + command: ["sh", "-c"] + args: + - >- + curl -v -X PUT + -d '"SHUTTING_DOWN"' + -H 'Content-type: application/json' + -H 'X-Trino-User: {{ .Values.worker.gracefulShutdown.accessControl.user }}' + --fail-with-body + http://{{ include "trino.fullname" . }}-workers:{{- .Values.service.port -}}/v1/info/state + restartPolicy: Never +{{- end }} diff --git a/charts/trino/values.yaml b/charts/trino/values.yaml index 8892ca74..2cbb3165 100644 --- a/charts/trino/values.yaml +++ b/charts/trino/values.yaml @@ -574,9 +574,11 @@ coordinator: # files from Kubernetes secrets on the coordinator node. # @raw # Example: + # ```yaml # - name: sample-secret # secretName: sample-secret # path: /secrets/sample.json + # ``` worker: jvm: @@ -649,21 +651,43 @@ worker: # ``` lifecycle: {} - # worker.lifecycle -- To enable [graceful - # shutdown](https://trino.io/docs/current/admin/graceful-shutdown.html), - # define a lifecycle preStop like bellow, Set the - # `terminationGracePeriodSeconds` to a value greater than or equal to the - # configured `shutdown.grace-period`. Configure `shutdown.grace-period` in - # `additionalConfigProperties` as `shutdown.grace-period=2m` (default is 2 - # minutes). Also configure `accessControl` because the `default` system - # access control does not allow graceful shutdowns. + # worker.lifecycle -- Worker container [lifecycle + # events](https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/) # @raw # Example: # ```yaml # preStop: # exec: - # command: ["/bin/sh", "-c", "curl -v -X PUT -d '\"SHUTTING_DOWN\"' -H \"Content-type: application/json\" http://localhost:8081/v1/info/state"] + # command: ["/bin/sh", "-c", "sleep 120"] + # ``` + # If provided, it will override the `preStop` lifecycle event configured by `gracefulShutdown`. + + gracefulShutdown: + enabled: false + gracePeriod: 2m + accessControl: + user: admin + configFile: graceful-shutdown-rules.json + # worker.gracefulShutdown -- Configure [graceful + # shutdown](https://trino.io/docs/current/admin/graceful-shutdown.html) + # @raw + # Example: + # ```yaml + # gracefulShutdown: + # enabled: true + # gracePeriod: 2m + # accessControl: + # user: admin + # configFile: graceful-shutdown-rules.json # ``` + # Enabling this feature will: + # 1) Add a `preStop` lifecycle event to all worker Pods; + # 2) Set the `shutdown.grace-period` configuration property to `gracePeriod`; + # 3) Configure the workers' `accessControl` since the `default` system access control [does not allow graceful + # shutdowns](https://trino.io/docs/current/admin/graceful-shutdown.html). + # The user must set the `terminationGracePeriodSeconds` to a value of at least two times the configured `gracePeriod`. + # The worker that receives the graceful shutdown request [will sleep for `gracePeriod` twice](https://trino.io/docs/current/admin/graceful-shutdown.html#shutdown-behavior). + # Setting `worker.lifecycle` will override the `preStop` event set by this configuration. terminationGracePeriodSeconds: 30 diff --git a/test-graceful-shutdown-values.yaml b/test-graceful-shutdown-values.yaml new file mode 100644 index 00000000..2ef79019 --- /dev/null +++ b/test-graceful-shutdown-values.yaml @@ -0,0 +1,9 @@ +worker: + gracefulShutdown: + enabled: true + gracePeriod: 1m + accessControl: + user: admin + configFile: graceful-shutdown-rules.json + + terminationGracePeriodSeconds: 130 diff --git a/test.sh b/test.sh index 6f039413..470625bc 100755 --- a/test.sh +++ b/test.sh @@ -9,6 +9,7 @@ declare -A testCases=( [overrides]="--set coordinatorNameOverride=coordinator-overridden,workerNameOverride=worker-overridden,nameOverride=overridden" [access_control_properties_values]="--values test-access-control-properties-values.yaml" [exchange_manager_values]="--values test-exchange-manager-values.yaml" + [graceful_shutdown]="--values test-graceful-shutdown-values.yaml" ) function join_by { @@ -23,7 +24,7 @@ NAMESPACE=trino-$(LC_ALL=C tr -dc 'a-z0-9' &2