Skip to content

Commit

Permalink
Update GMC manifest changes and misc fixes (#564)
Browse files Browse the repository at this point in the history
* Adapt to latest upstream vllm changes

- Run as the default user of vllm upstream image since the upstream
  doesn't support run as non-default user

- Adpat llm-uservice's vllm-values.yaml to ci-vllm-gaudi-values.yaml to
  enable CI on gaudi instead of xeon because of test speed

* Update GMC manifest changes

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
  • Loading branch information
lianhao authored Nov 15, 2024
1 parent 823ce22 commit 87dc673
Show file tree
Hide file tree
Showing 37 changed files with 4,052 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ repos:
- id: requirements-txt-fixer
- id: trailing-whitespace
files: (.*\.(py|rst|cmake|yaml|yml|json|ts|js|html|svelte|sh))$
exclude: (microservices-connector/config/manifests/.*\.yaml)$

- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.5
hooks:
- id: insert-license
files: (.*\.(py|yaml|yml|sh))$
exclude: (microservices-connector/config/manifests/.*\.yaml)$
args:
[
--license-filepath=.github/license_template.txt,
Expand Down
4 changes: 2 additions & 2 deletions helm-charts/common/gpt-sovits/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,6 @@ tolerations: []
affinity: {}

global:
gpt-sovits_proxy: ""
gpt-sovitss_proxy: ""
http_proxy: ""
https_proxy: ""
no_proxy: ""
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ tgi:
enabled: false
vllm:
enabled: true
image:
repository: opea/vllm-hpu
tag: "latest"
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
OMPI_MCA_btl_vader_single_copy_mechanism: none
extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
resources:
limits:
habana.ai/gaudi: 1

vLLM_ENDPOINT: ""
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/common/vllm/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
- |
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "vllm.fullname" . }}/v1/completions \
curl http://{{ include "vllm.fullname" . }}/v1/completions -sS --fail-with-body \
-H "Content-Type: application/json" \
-d '{"model": {{ .Values.LLM_MODEL_ID | quote }},"prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}' && break;
curlcode=$?
Expand Down
20 changes: 10 additions & 10 deletions helm-charts/common/vllm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ podAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000

securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
securityContext: {}
# readOnlyRootFilesystem: true
# allowPrivilegeEscalation: false
# runAsNonRoot: true
# runAsUser: 1000
# capabilities:
# drop:
# - ALL
# seccompProfile:
# type: RuntimeDefault

service:
type: ClusterIP
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/update_manifests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function generate_yaml {
if [[ "$filename" =~ ^variant_.*-values.yaml ]]; then
ext=$(echo $filename | sed 's/^variant_//' | sed 's/-values.yaml$//')
outputfile="$ext-${chart}.yaml"
releasename=$ext-$chart
releasename=$(echo "${ext}-${chart}" | sed 's/_/-/g')
else
ext=$(echo $filename | sed 's/-values.yaml$//')
outputfile="${chart}_${ext}.yaml"
Expand Down
143 changes: 143 additions & 0 deletions microservices-connector/config/manifests/agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
---
# Source: agent/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ConfigMap
metadata:
name: agent-config
labels:
helm.sh/chart: agent-1.0.0
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
llm_endpoint_url: "http://agent-tgi"
#
model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
RETRIEVAL_TOOL_URL: "http://agent-docretriever:8889/v1/retrievaltool"
CRAG_SERVER: "http://agent-crag:8080"
WORKER_AGENT_URL: "http://agent-worker:9095/v1/chat/completions"
require_human_feedback: "false"
recursion_limit: "15"
llm_engine: "tgi"
strategy: "react_langchain"
max_new_tokens: "4096"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: "True"
---
# Source: agent/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: Service
metadata:
name: agent
labels:
helm.sh/chart: agent-1.0.0
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9090
targetPort: 9090
protocol: TCP
name: agent
selector:
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
---
# Source: agent/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: agent
labels:
helm.sh/chart: agent-1.0.0
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
template:
metadata:
labels:
app.kubernetes.io/name: agent
app.kubernetes.io/instance: agent
spec:
securityContext:
{}
containers:
- name: agent
envFrom:
- configMapRef:
name: agent-config
- configMapRef:
name: extra-env-config
optional: true
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/agent-langchain:latest"
imagePullPolicy: IfNotPresent
ports:
- name: agent
containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: agent
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: agent
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: agent
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: agent/templates/servicemonitor.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
Loading

0 comments on commit 87dc673

Please sign in to comment.