Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add env variables for overriding embedding batch size #2395

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
except ValueError:
INDEX_BATCH_SIZE = 16


# Below are intended to match the env variables names used by the official postgres docker image
# https://hub.docker.com/_/postgres
POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres"
Expand Down
8 changes: 6 additions & 2 deletions backend/danswer/configs/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@
ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ")
ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ")
# Purely an optimization, memory limitation consideration
BATCH_SIZE_ENCODE_CHUNKS = 8

# User's set embedding batch size overrides the default encoding batch sizes
EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None

BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8
# don't send over too many chunks at once, as sending too many could cause timeouts
BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512
BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512
# For score display purposes, only way is to know the expected ranges
CROSS_ENCODER_RANGE_MAX = 1
CROSS_ENCODER_RANGE_MIN = 0
Expand Down
4 changes: 3 additions & 1 deletion deployment/docker_compose/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ services:
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
Expand Down Expand Up @@ -93,7 +94,6 @@ services:
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
# Seeding configuration
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
extra_hosts:
- "host.docker.internal:host-gateway"
logging:
Expand Down Expand Up @@ -167,6 +167,7 @@ services:
- GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
- NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
- GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}

# Danswer SlackBot Configs
- DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-}
- DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-}
Expand Down Expand Up @@ -269,6 +270,7 @@ services:
fi"
restart: on-failure
environment:
- INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
- INDEXING_ONLY=True
# Set to debug to get more fine-grained logs
Expand Down
2 changes: 2 additions & 0 deletions deployment/docker_compose/docker-compose.gpu-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ services:
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
Expand Down Expand Up @@ -240,6 +241,7 @@ services:
fi"
restart: on-failure
environment:
- INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
Expand Down
1 change: 1 addition & 0 deletions deployment/helm/charts/danswer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ configMap:
# Internet Search Tool
BING_API_KEY: ""
# Don't change the NLP models unless you know what you're doing
EMBEDDING_BATCH_SIZE: ""
DOCUMENT_ENCODER_MODEL: ""
NORMALIZE_EMBEDDINGS: ""
ASYM_QUERY_PREFIX: ""
Expand Down
1 change: 1 addition & 0 deletions deployment/kubernetes/env-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ data:
# Internet Search Tool
BING_API_KEY: ""
# Don't change the NLP models unless you know what you're doing
EMBEDDING_BATCH_SIZE: ""
DOCUMENT_ENCODER_MODEL: ""
NORMALIZE_EMBEDDINGS: ""
ASYM_QUERY_PREFIX: ""
Expand Down
Loading