Add environment variable for setting vespa search threads (#2400)

onyx-dot-app · Sep 12, 2024 · f69922f · f69922f
1 parent d4d37c9
commit f69922f
Show file tree

Hide file tree

Showing 11 changed files with 29 additions and 1 deletion.
diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py
@@ -93,3 +93,5 @@
 
 # Enable in-house model for detecting connector-based filtering in queries
 ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)
+
+VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)
diff --git a/backend/danswer/document_index/vespa/app_config/services.xml b/backend/danswer/document_index/vespa/app_config/services.xml
@@ -26,11 +26,22 @@
                 <disk>0.75</disk>
             </resource-limits>
         </tuning>
+        <engine>    
+            <proton>
+                <tuning>
+                    <searchnode>
+                        <requestthreads>
+                            <persearch>SEARCH_THREAD_NUMBER</persearch>
+                        </requestthreads>
+                    </searchnode>
+                </tuning>
+            </proton>
+        </engine>
         <config name="vespa.config.search.summary.juniperrc">
             <max_matches>3</max_matches>
             <length>750</length>
             <surround_max>350</surround_max>
             <min_length>300</min_length>
         </config>
     </content>
-</services>
+</services>
diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py
@@ -16,6 +16,7 @@
 from danswer.configs.chat_configs import DOC_TIME_DECAY
 from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
+from danswer.configs.chat_configs import VESPA_SEARCHER_THREADS
 from danswer.configs.constants import KV_REINDEX_KEY
 from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.interfaces import DocumentInsertionRecord
@@ -52,6 +53,7 @@
 from danswer.document_index.vespa_constants import DOCUMENT_SETS
 from danswer.document_index.vespa_constants import HIDDEN
 from danswer.document_index.vespa_constants import NUM_THREADS
+from danswer.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT
 from danswer.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
 from danswer.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT
 from danswer.document_index.vespa_constants import VESPA_TIMEOUT
@@ -134,6 +136,10 @@ def ensure_indices_exist(
 
         doc_lines = _create_document_xml_lines(schema_names)
         services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines)
+        services = services.replace(
+            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
+        )
+
         kv_store = get_dynamic_config_store()
 
         needs_reindexing = False

diff --git a/backend/danswer/document_index/vespa_constants.py b/backend/danswer/document_index/vespa_constants.py
@@ -7,6 +7,7 @@
 VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM"
 DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME"
 DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT"
+SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER"
 DATE_REPLACEMENT = "DATE_REPLACEMENT"
 
 # config server

diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
@@ -154,6 +154,7 @@ services:
       - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
       - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
       # Indexing Configs
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}
       - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
       - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
       - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}

diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml
@@ -147,6 +147,7 @@ services:
       - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
       - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
       # Indexing Configs
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}
       - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
       - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
       - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
@@ -282,6 +283,7 @@ services:
       - INDEXING_ONLY=True
       # Set to debug to get more fine-grained logs
       - LOG_LEVEL=${LOG_LEVEL:-info}
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
     volumes:
       # Not necessary, this is just to reduce download time during startup
       - indexing_huggingface_model_cache:/root/.cache/huggingface/

diff --git a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
@@ -134,6 +134,7 @@ services:
       - INDEXING_ONLY=True
       # Set to debug to get more fine-grained logs
       - LOG_LEVEL=${LOG_LEVEL:-info}
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
     volumes:
       # Not necessary, this is just to reduce download time during startup
       - indexing_huggingface_model_cache:/root/.cache/huggingface/

diff --git a/deployment/docker_compose/docker-compose.prod.yml b/deployment/docker_compose/docker-compose.prod.yml
@@ -148,6 +148,7 @@ services:
       - INDEXING_ONLY=True
       # Set to debug to get more fine-grained logs
       - LOG_LEVEL=${LOG_LEVEL:-info}
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
     volumes:
       # Not necessary, this is just to reduce download time during startup
       - indexing_huggingface_model_cache:/root/.cache/huggingface/

diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml
@@ -138,6 +138,7 @@ services:
       - INDEXING_ONLY=True
       - LOG_LEVEL=${LOG_LEVEL:-debug}
       - index_model_cache_huggingface:/root/.cache/huggingface/
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1}
     logging:
       driver: json-file
       options:

diff --git a/deployment/helm/charts/danswer/values.yaml b/deployment/helm/charts/danswer/values.yaml
@@ -421,6 +421,7 @@ configMap:
   MODEL_SERVER_PORT: ""
   MIN_THREADS_ML_MODELS: ""
   # Indexing Configs
+  VESPA_SEARCHER_THREADS: ""
   NUM_INDEXING_WORKERS: ""
   DISABLE_INDEX_UPDATE_ON_SWAP: ""
   DASK_JOB_CLIENT_ENABLED: ""

diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml
@@ -46,6 +46,7 @@ data:
   INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service"
   MIN_THREADS_ML_MODELS: ""
   # Indexing Configs
+  VESPA_SEARCHER_THREADS: ""
   NUM_INDEXING_WORKERS: ""
   ENABLED_CONNECTOR_TYPES: ""
   DISABLE_INDEX_UPDATE_ON_SWAP: ""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,3 +93,5 @@

		# Enable in-house model for detecting connector-based filtering in queries
		ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)

		VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)