-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathdeployment.yaml
168 lines (167 loc) · 5.62 KB
/
deployment.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "vllm.fullname" . }}
labels:
{{- include "vllm.labels" . | nindent 4 }}
spec:
{{- if ne (int .Values.replicaCount) 1 }}
# remove if replica count should not be reset on pod update (e.g. with HPA)
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "vllm.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "vllm.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "vllm.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if not (hasPrefix "/data/" .Values.LLM_MODEL_ID) }}
initContainers:
- name: model-downloader
envFrom:
- configMapRef:
name: {{ include "vllm.fullname" . }}-config
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
add:
- DAC_OVERRIDE
# To be able to make data model directory group writable for
# previously downloaded model by old versions of helm chart
- FOWNER
seccompProfile:
type: RuntimeDefault
image: huggingface/downloader:0.17.3
command: ['sh', '-ec']
args:
- |
echo "Huggingface log in ...";
huggingface-cli login --token $(HF_TOKEN);
echo "Download model {{ .Values.LLM_MODEL_ID }} ... ";
huggingface-cli download --cache-dir /data {{ .Values.LLM_MODEL_ID | quote }};
echo "Change model files mode ...";
chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}
# NOTE: Buggy logout command;
# huggingface-cli logout;
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
{{- end }}
containers:
- name: {{ .Chart.Name }}
envFrom:
- configMapRef:
name: {{ include "vllm.fullname" . }}-config
{{- if .Values.global.extraEnvConfig }}
- configMapRef:
name: {{ .Values.global.extraEnvConfig }}
optional: true
{{- end }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
{{- if .Values.image.pullPolicy }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- end }}
args:
{{- if .Values.extraCmdArgs }}
{{- range .Values.extraCmdArgs }}
- {{ . | quote }}
{{- end }}
{{- end }}
- "--model"
- {{ .Values.LLM_MODEL_ID | quote }}
- "--host"
- "0.0.0.0"
- "--port"
- {{ .Values.port | quote }}
- "--download-dir"
- "/data"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: {{ .Values.port }}
protocol: TCP
{{- if .Values.livenessProbe }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
{{- end }}
{{- if .Values.readinessProbe }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
{{- end }}
{{- if .Values.startupProbe }}
startupProbe:
{{- toYaml .Values.startupProbe | nindent 12 }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
- name: model-volume
{{- if .Values.global.modelUsePVC }}
persistentVolumeClaim:
claimName: {{ .Values.global.modelUsePVC }}
{{- else if .Values.global.modelUseHostPath }}
hostPath:
path: {{ .Values.global.modelUseHostPath }}
type: Directory
{{- else }}
emptyDir: {}
{{- end }}
- name: shm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.shmSize }}
- name: tmp
emptyDir: {}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.evenly_distributed }}
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
{{- include "vllm.selectorLabels" . | nindent 14 }}
{{- end }}
{{- if not .Values.accelDevice }}
# extra time to finish processing buffered requests on CPU before pod is forcibly terminated
terminationGracePeriodSeconds: 120
{{- end }}