diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index d7733fdc0ab..35fcb8903e4 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -126,6 +126,7 @@ except ValueError: INDEX_BATCH_SIZE = 16 + # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index bfd62357304..c9668cd8136 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -39,9 +39,13 @@ ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ") ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ") # Purely an optimization, memory limitation consideration -BATCH_SIZE_ENCODE_CHUNKS = 8 + +# User's set embedding batch size overrides the default encoding batch sizes +EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None + +BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8 # don't send over too many chunks at once, as sending too many could cause timeouts -BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512 +BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512 # For score display purposes, only way is to know the expected ranges CROSS_ENCODER_RANGE_MAX = 1 CROSS_ENCODER_RANGE_MIN = 0 diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 06bb3fe6e2f..69d7b105552 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -65,6 +65,7 @@ services: - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose # Don't change the NLP model configs unless you know what you're doing + - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} @@ -93,7 +94,6 @@ services: - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} # Seeding configuration - - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-} extra_hosts: - "host.docker.internal:host-gateway" logging: @@ -167,6 +167,7 @@ services: - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} + # Danswer SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-} @@ -269,6 +270,7 @@ services: fi" restart: on-failure environment: + - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - INDEXING_ONLY=True # Set to debug to get more fine-grained logs diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index d46cc6ff015..f906d9cd1e1 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -61,6 +61,7 @@ services: - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose # Don't change the NLP model configs unless you know what you're doing + - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} @@ -240,6 +241,7 @@ services: fi" restart: on-failure environment: + - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} diff --git a/deployment/helm/charts/danswer/values.yaml b/deployment/helm/charts/danswer/values.yaml index 9d869f126bf..954838c168e 100644 --- a/deployment/helm/charts/danswer/values.yaml +++ b/deployment/helm/charts/danswer/values.yaml @@ -412,6 +412,7 @@ configMap: # Internet Search Tool BING_API_KEY: "" # Don't change the NLP models unless you know what you're doing + EMBEDDING_BATCH_SIZE: "" DOCUMENT_ENCODER_MODEL: "" NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: "" diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index 95c663ad5e1..6e915d5886e 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -35,6 +35,7 @@ data: # Internet Search Tool BING_API_KEY: "" # Don't change the NLP models unless you know what you're doing + EMBEDDING_BATCH_SIZE: "" DOCUMENT_ENCODER_MODEL: "" NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: ""