Skip to content

Commit

Permalink
ChatQnA: accelerate also teirerank with Gaudi (#475)
Browse files Browse the repository at this point in the history
ChatQnA: accelerate also teirerank

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
  • Loading branch information
eero-t authored Oct 25, 2024
1 parent bdb9af9 commit 620963f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 15 deletions.
51 changes: 36 additions & 15 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,10 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

tei:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: synapse_1.16
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

# To override values in subchart tgi
# TGI: largest bottleneck for ChatQnA
tgi:
accelDevice: "gaudi"
image:
Expand All @@ -41,3 +29,36 @@ tgi:
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120

# Reranking: second largest bottleneck when reranking is in use
# (i.e. query context docs have been uploaded with data-prep)
teirerank:
accelDevice: "gaudi"
image:
repository: opea/tei-gaudi
tag: "latest"
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1

# Embedding: Second largest bottleneck without rerank
tei:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: synapse_1.16
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
24 changes: 24 additions & 0 deletions helm-charts/common/teirerank/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Default values for teirerank.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

accelDevice: "gaudi"

image:
repository: opea/tei-gaudi
tag: "latest"

securityContext:
readOnlyRootFilesystem: false

resources:
limits:
habana.ai/gaudi: 1

livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
1 change: 1 addition & 0 deletions helm-charts/common/teirerank/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ data:
{{- if .Values.global.HF_ENDPOINT }}
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
{{- end }}
MAX_WARMUP_SEQUENCE_LENGTH: "512"

0 comments on commit 620963f

Please sign in to comment.