Skip to content

Commit

Permalink
Merge pull request #1436 from ManishMadan2882/main
Browse files Browse the repository at this point in the history
React Widget: Search bar component
  • Loading branch information
dartpain authored Nov 22, 2024
2 parents a0a05b6 + 1595e02 commit 92d9086
Show file tree
Hide file tree
Showing 17 changed files with 842 additions and 243 deletions.
6 changes: 6 additions & 0 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,9 @@ def post(self):
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
job_name,
final_filename,
Expand All @@ -365,6 +368,9 @@ def post(self):
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
job_name,
final_filename,
Expand Down
1 change: 1 addition & 0 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Settings(BaseSettings):
DEFAULT_MAX_HISTORY: int = 150
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
UPLOAD_FOLDER: str = "inputs"
PARSE_PDF_AS_IMAGE: bool = False
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search

Expand Down
4 changes: 4 additions & 0 deletions application/parser/file/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
from application.parser.file.json_parser import JSONParser
from application.parser.file.pptx_parser import PPTXParser
from application.parser.file.image_parser import ImageParser
from application.parser.schema.base import Document

DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
Expand All @@ -27,6 +28,9 @@
".mdx": MarkdownParser(),
".json":JSONParser(),
".pptx":PPTXParser(),
".png": ImageParser(),
".jpg": ImageParser(),
".jpeg": ImageParser(),
}


Expand Down
12 changes: 11 additions & 1 deletion application/parser/file/docs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from typing import Dict

from application.parser.file.base_parser import BaseParser

from application.core.settings import settings
import requests

class PDFParser(BaseParser):
"""PDF parser."""
Expand All @@ -18,6 +19,15 @@ def _init_parser(self) -> Dict:

def parse_file(self, file: Path, errors: str = "ignore") -> str:
"""Parse file."""
if settings.PARSE_PDF_AS_IMAGE:
doc2md_service = "https://llm.arc53.com/doc2md"
# alternatively you can use local vision capable LLM
with open(file, "rb") as file_loaded:
files = {'file': file_loaded}
response = requests.post(doc2md_service, files=files)
data = response.json()["markdown"]
return data

try:
import PyPDF2
except ImportError:
Expand Down
27 changes: 27 additions & 0 deletions application/parser/file/image_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Image parser.
Contains parser for .png, .jpg, .jpeg files.
"""
from pathlib import Path
import requests
from typing import Dict, Union

from application.parser.file.base_parser import BaseParser


class ImageParser(BaseParser):
"""Image parser."""

def _init_parser(self) -> Dict:
"""Init parser."""
return {}

def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
doc2md_service = "https://llm.arc53.com/doc2md"
# alternatively you can use local vision capable LLM
with open(file, "rb") as file_loaded:
files = {'file': file_loaded}
response = requests.post(doc2md_service, files=files)
data = response.json()["markdown"]
return data
9 changes: 5 additions & 4 deletions extensions/react-widget/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion extensions/react-widget/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
"scripts": {
"build": "parcel build src/main.tsx --public-url ./",
"build:react": "parcel build src/index.ts",
"dev": "parcel src/index.html -p 3000",
"serve": "parcel serve -p 3000",
"dev": "parcel -p 3000",
"test": "jest",
"lint": "eslint",
"check": "tsc --noEmit",
Expand Down
8 changes: 4 additions & 4 deletions extensions/react-widget/src/App.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import React from "react"
import {DocsGPTWidget} from "./components/DocsGPTWidget"
const App = () => {
import {SearchBar} from "./components/SearchBar"
export const App = () => {
return (
<div>
<SearchBar/>
<DocsGPTWidget/>
</div>
)
}

export default App
}
Loading

0 comments on commit 92d9086

Please sign in to comment.