From d5efa73bcca7685743829e53a104ebf883e6f540 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 11 Sep 2024 10:15:50 -0700 Subject: [PATCH 1/3] add env variabels for overriding --- backend/danswer/configs/app_configs.py | 1 + backend/danswer/configs/model_configs.py | 2 ++ .../natural_language_processing/search_nlp_models.py | 7 +++++-- deployment/docker_compose/docker-compose.dev.yml | 6 ++++-- deployment/docker_compose/docker-compose.gpu-dev.yml | 2 +- .../docker_compose/docker-compose.search-testing.yml | 2 +- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index d7733fdc0ab..35fcb8903e4 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -126,6 +126,7 @@ except ValueError: INDEX_BATCH_SIZE = 16 + # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index bfd62357304..16355b5578c 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -46,6 +46,8 @@ CROSS_ENCODER_RANGE_MAX = 1 CROSS_ENCODER_RANGE_MIN = 0 +EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None + ##### # Generative AI Model Configs diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index 6dcec724345..54c2b56454f 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -15,6 +15,7 @@ BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, ) from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE +from danswer.configs.model_configs import EMBEDDING_BATCH_SIZE from danswer.db.models import SearchSettings from danswer.natural_language_processing.utils import get_tokenizer from danswer.natural_language_processing.utils import tokenizer_trim_content @@ -173,8 +174,10 @@ def encode( texts: list[str], text_type: EmbedTextType, large_chunks_present: bool = False, - local_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS, - api_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, + local_embedding_batch_size: int = EMBEDDING_BATCH_SIZE + or BATCH_SIZE_ENCODE_CHUNKS, + api_embedding_batch_size: int = EMBEDDING_BATCH_SIZE + or BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE, ) -> list[Embedding]: if not texts or not all(texts): diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 06bb3fe6e2f..3537d2fb8ee 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -93,7 +93,7 @@ services: - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} # Seeding configuration - - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-} + - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} extra_hosts: - "host.docker.internal:host-gateway" logging: @@ -167,6 +167,7 @@ services: - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} + # Danswer SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-} @@ -269,6 +270,7 @@ services: fi" restart: on-failure environment: + - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - INDEXING_ONLY=True # Set to debug to get more fine-grained logs @@ -290,7 +292,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index d46cc6ff015..6acf18dcbeb 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -298,7 +298,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index ecd796f6716..c477e85512d 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -153,7 +153,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432" + - "5433" volumes: - db_volume:/var/lib/postgresql/data From ffea7eb38754bf506ba6ba7b50fa0c2f626b7ad5 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 11 Sep 2024 10:20:35 -0700 Subject: [PATCH 2/3] proper ports --- deployment/docker_compose/docker-compose.dev.yml | 2 +- deployment/docker_compose/docker-compose.gpu-dev.yml | 2 +- deployment/docker_compose/docker-compose.search-testing.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 3537d2fb8ee..f741b9c2129 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -292,7 +292,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 6acf18dcbeb..d46cc6ff015 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -298,7 +298,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index c477e85512d..ecd796f6716 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -153,7 +153,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433" + - "5432" volumes: - db_volume:/var/lib/postgresql/data From 10a49463fe7335cb77f497ec65df68ca3a88a695 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 11 Sep 2024 12:15:45 -0700 Subject: [PATCH 3/3] proper overrides --- backend/danswer/configs/model_configs.py | 10 ++++++---- .../natural_language_processing/search_nlp_models.py | 7 ++----- deployment/docker_compose/docker-compose.dev.yml | 2 +- deployment/docker_compose/docker-compose.gpu-dev.yml | 2 ++ deployment/helm/charts/danswer/values.yaml | 1 + deployment/kubernetes/env-configmap.yaml | 1 + 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index 16355b5578c..c9668cd8136 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -39,15 +39,17 @@ ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ") ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ") # Purely an optimization, memory limitation consideration -BATCH_SIZE_ENCODE_CHUNKS = 8 + +# User's set embedding batch size overrides the default encoding batch sizes +EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None + +BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8 # don't send over too many chunks at once, as sending too many could cause timeouts -BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512 +BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512 # For score display purposes, only way is to know the expected ranges CROSS_ENCODER_RANGE_MAX = 1 CROSS_ENCODER_RANGE_MIN = 0 -EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None - ##### # Generative AI Model Configs diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index 54c2b56454f..6dcec724345 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -15,7 +15,6 @@ BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, ) from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.configs.model_configs import EMBEDDING_BATCH_SIZE from danswer.db.models import SearchSettings from danswer.natural_language_processing.utils import get_tokenizer from danswer.natural_language_processing.utils import tokenizer_trim_content @@ -174,10 +173,8 @@ def encode( texts: list[str], text_type: EmbedTextType, large_chunks_present: bool = False, - local_embedding_batch_size: int = EMBEDDING_BATCH_SIZE - or BATCH_SIZE_ENCODE_CHUNKS, - api_embedding_batch_size: int = EMBEDDING_BATCH_SIZE - or BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, + local_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS, + api_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE, ) -> list[Embedding]: if not texts or not all(texts): diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index f741b9c2129..69d7b105552 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -65,6 +65,7 @@ services: - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose # Don't change the NLP model configs unless you know what you're doing + - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} @@ -93,7 +94,6 @@ services: - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} # Seeding configuration - - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} extra_hosts: - "host.docker.internal:host-gateway" logging: diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index d46cc6ff015..f906d9cd1e1 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -61,6 +61,7 @@ services: - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose # Don't change the NLP model configs unless you know what you're doing + - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} @@ -240,6 +241,7 @@ services: fi" restart: on-failure environment: + - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} diff --git a/deployment/helm/charts/danswer/values.yaml b/deployment/helm/charts/danswer/values.yaml index 9d869f126bf..954838c168e 100644 --- a/deployment/helm/charts/danswer/values.yaml +++ b/deployment/helm/charts/danswer/values.yaml @@ -412,6 +412,7 @@ configMap: # Internet Search Tool BING_API_KEY: "" # Don't change the NLP models unless you know what you're doing + EMBEDDING_BATCH_SIZE: "" DOCUMENT_ENCODER_MODEL: "" NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: "" diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index 95c663ad5e1..6e915d5886e 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -35,6 +35,7 @@ data: # Internet Search Tool BING_API_KEY: "" # Don't change the NLP models unless you know what you're doing + EMBEDDING_BATCH_SIZE: "" DOCUMENT_ENCODER_MODEL: "" NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: ""