diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 41dcc59d76c..8b660f5a9e3 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -206,15 +206,15 @@ def update_default_multipass_indexing(db_session: Session) -> None: logger.info(f"GPU availability: {gpu_available}") current_settings = get_current_search_settings(db_session) - if current_settings: - logger.notice(f"Updating multipass indexing setting to: {gpu_available}") - updated_settings = SavedSearchSettings.from_db_model(current_settings) - updated_settings.multipass_indexing = gpu_available - update_current_search_settings(db_session, updated_settings) - else: - logger.warning( - "No current search settings found. Skipping multipass indexing update." - ) + + logger.notice(f"Updating multipass indexing setting to: {gpu_available}") + updated_settings = SavedSearchSettings.from_db_model(current_settings) + # Enable multipass indexing if GPU is available or if using a cloud provider + updated_settings.multipass_indexing = ( + gpu_available or current_settings.cloud_provider is not None + ) + update_current_search_settings(db_session, updated_settings) + else: logger.debug( "Existing docs or connectors found. Skipping multipass indexing update." @@ -370,9 +370,6 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: translate_saved_search_settings(db_session) - # update multipass indexing setting based on GPU availability - update_default_multipass_indexing(db_session) - # Does the user need to trigger a reindexing to bring the document index # into a good state, marked in the kv store mark_reindex_flag(db_session) @@ -403,6 +400,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: ), ) + # update multipass indexing setting based on GPU availability + update_default_multipass_indexing(db_session) + optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__}) yield diff --git a/backend/danswer/utils/gpu_utils.py b/backend/danswer/utils/gpu_utils.py index 5b872ffddb4..70a3dbc2c95 100644 --- a/backend/danswer/utils/gpu_utils.py +++ b/backend/danswer/utils/gpu_utils.py @@ -1,28 +1,30 @@ import requests +from retry import retry from danswer.utils.logger import setup_logger +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import INDEXING_MODEL_SERVER_PORT from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT logger = setup_logger() -def gpu_status_request(indexing: bool = False) -> bool: - model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}" +@retry(tries=5, delay=5) +def gpu_status_request(indexing: bool = True) -> bool: + if indexing: + model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}" + else: + model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}" if "http" not in model_server_url: model_server_url = f"http://{model_server_url}" - response = requests.get(f"{model_server_url}/api/gpu-status") - - if response.status_code == 200: + try: + response = requests.get(f"{model_server_url}/api/gpu-status", timeout=10) + response.raise_for_status() gpu_status = response.json() - if gpu_status["gpu_available"]: - return True - else: - return False - else: - logger.warning( - f"Error: Unable to fetch GPU status. Status code: {response.status_code}" - ) - return False + return gpu_status["gpu_available"] + except requests.RequestException as e: + logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}") + raise # Re-raise exception to trigger a retry