From 5be02bcd8814c7f2bdb7252eff5a0b95fd5b4559 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 9 Oct 2024 09:38:27 -0700 Subject: [PATCH 1/3] convert to jpeg --- backend/danswer/configs/app_configs.py | 2 +- backend/danswer/llm/answering/answer.py | 1 + .../server/query_and_chat/chat_backend.py | 31 ++++++++++++++++--- .../docker_compose/docker-compose.dev.yml | 2 +- .../docker_compose/docker-compose.gpu-dev.yml | 2 +- .../docker-compose.search-testing.yml | 2 +- 6 files changed, 32 insertions(+), 8 deletions(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index eaa231e88b7..f31b968d54c 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -135,7 +135,7 @@ os.environ.get("POSTGRES_PASSWORD") or "password" ) POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" -POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" +POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5433" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" POSTGRES_API_SERVER_POOL_SIZE = int( diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index 4648e0fe821..e35c02f2dfd 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -330,6 +330,7 @@ def _process_llm_stream( tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, ) -> Iterator[str | StreamStopInfo]: + print("prompt", prompt) for message in self.llm.stream( prompt=prompt, tools=tools, tool_choice=tool_choice ): diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 36a09afde19..31574b51936 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -3,6 +3,7 @@ import uuid from collections.abc import Callable from collections.abc import Generator +from typing import Tuple from fastapi import APIRouter from fastapi import Depends @@ -11,6 +12,7 @@ from fastapi import Response from fastapi import UploadFile from fastapi.responses import StreamingResponse +from PIL import Image from pydantic import BaseModel from sqlalchemy.orm import Session @@ -508,6 +510,21 @@ def seed_chat( """File upload""" +def convert_to_jpeg(file: UploadFile) -> Tuple[io.BytesIO, str]: + try: + img = Image.open(file.file) + if img.mode != "RGB": + img = img.convert("RGB") + jpeg_io = io.BytesIO() + img.save(jpeg_io, format="JPEG", quality=85) + jpeg_io.seek(0) + return jpeg_io, "image/jpeg" + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Failed to convert image: {str(e)}" + ) + + @router.post("/file") def upload_files_for_chat( files: list[UploadFile], @@ -570,19 +587,25 @@ def upload_files_for_chat( for file in files: if file.content_type in image_content_types: file_type = ChatFileType.IMAGE + # Convert image to JPEG + file_content, new_content_type = convert_to_jpeg(file) elif file.content_type in document_content_types: file_type = ChatFileType.DOC + file_content = file.file + new_content_type = file.content_type else: file_type = ChatFileType.PLAIN_TEXT + file_content = file.file + new_content_type = file.content_type - # store the raw file + # store the file (now JPEG for images) file_id = str(uuid.uuid4()) file_store.save_file( file_name=file_id, - content=file.file, + content=file_content, display_name=file.filename, file_origin=FileOrigin.CHAT_UPLOAD, - file_type=file.content_type or file_type.value, + file_type=new_content_type or file_type.value, ) # if the file is a doc, extract text and store that so we don't need @@ -604,7 +627,7 @@ def upload_files_for_chat( # as we would always use this as the ID to attach to the # message file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT)) - else: + elif file_type != ChatFileType.DOC: file_info.append((file_id, file.filename, file_type)) return { diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 4d0eff8612d..cdc2575ad2e 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -299,7 +299,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 6397f657c19..03e436a2eb2 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -312,7 +312,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index fab950c064e..2afd54e029c 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -157,7 +157,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432" + - "5433" volumes: - db_volume:/var/lib/postgresql/data From 279138a9957a385f6c88682966986bd804259e2b Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 9 Oct 2024 09:39:35 -0700 Subject: [PATCH 2/3] k --- backend/danswer/configs/app_configs.py | 2 +- backend/danswer/llm/answering/answer.py | 1 - backend/danswer/server/query_and_chat/chat_backend.py | 2 +- deployment/docker_compose/docker-compose.dev.yml | 2 +- deployment/docker_compose/docker-compose.gpu-dev.yml | 2 +- deployment/docker_compose/docker-compose.search-testing.yml | 2 +- 6 files changed, 5 insertions(+), 6 deletions(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index f31b968d54c..eaa231e88b7 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -135,7 +135,7 @@ os.environ.get("POSTGRES_PASSWORD") or "password" ) POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" -POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5433" +POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" POSTGRES_API_SERVER_POOL_SIZE = int( diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index e35c02f2dfd..4648e0fe821 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -330,7 +330,6 @@ def _process_llm_stream( tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, ) -> Iterator[str | StreamStopInfo]: - print("prompt", prompt) for message in self.llm.stream( prompt=prompt, tools=tools, tool_choice=tool_choice ): diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 31574b51936..3b553287353 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -627,7 +627,7 @@ def upload_files_for_chat( # as we would always use this as the ID to attach to the # message file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT)) - elif file_type != ChatFileType.DOC: + else: file_info.append((file_id, file.filename, file_type)) return { diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index cdc2575ad2e..4d0eff8612d 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -299,7 +299,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 03e436a2eb2..6397f657c19 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -312,7 +312,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index 2afd54e029c..fab950c064e 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -157,7 +157,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433" + - "5432" volumes: - db_volume:/var/lib/postgresql/data From a5fe1efbda9178e4e6318788c34dd1d649a69ae1 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 9 Oct 2024 11:22:01 -0700 Subject: [PATCH 3/3] typing --- .../server/query_and_chat/chat_backend.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 3b553287353..91efe6cb874 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -512,12 +512,12 @@ def seed_chat( def convert_to_jpeg(file: UploadFile) -> Tuple[io.BytesIO, str]: try: - img = Image.open(file.file) - if img.mode != "RGB": - img = img.convert("RGB") - jpeg_io = io.BytesIO() - img.save(jpeg_io, format="JPEG", quality=85) - jpeg_io.seek(0) + with Image.open(file.file) as img: + if img.mode != "RGB": + img = img.convert("RGB") + jpeg_io = io.BytesIO() + img.save(jpeg_io, format="JPEG", quality=85) + jpeg_io.seek(0) return jpeg_io, "image/jpeg" except Exception as e: raise HTTPException( @@ -591,12 +591,12 @@ def upload_files_for_chat( file_content, new_content_type = convert_to_jpeg(file) elif file.content_type in document_content_types: file_type = ChatFileType.DOC - file_content = file.file - new_content_type = file.content_type + file_content = io.BytesIO(file.file.read()) + new_content_type = file.content_type or "" else: file_type = ChatFileType.PLAIN_TEXT - file_content = file.file - new_content_type = file.content_type + file_content = io.BytesIO(file.file.read()) + new_content_type = file.content_type or "" # store the file (now JPEG for images) file_id = str(uuid.uuid4())