-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Switch DB to postgres #10
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,14 @@ | ||
# Ignore everything | ||
** | ||
|
||
# Include (don't ignore) the application code | ||
!app | ||
!./pyproject.toml | ||
!./poetry.lock | ||
**/__pycache__ | ||
|
||
# Re-ignore pycache within `app`. | ||
**/__pycache__ | ||
|
||
# Include (don't ignore) the migrations. | ||
!migrations/*.sql | ||
!yoyo.ini |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,14 @@ | |
|
||
from app.ingest.store import StoreDep | ||
|
||
from .models import ImageStatement, RetrieveRequest, RetrieveResponse, TextStatement | ||
from .models import ImageChunk, RetrieveRequest, RetrieveResponse, TextChunk | ||
|
||
router = APIRouter(tags=["statements"], prefix="/statements") | ||
router = APIRouter(prefix="/chunks") | ||
|
||
|
||
@router.post("/retrieve") | ||
async def retrieve(store: StoreDep, request: RetrieveRequest) -> RetrieveResponse: | ||
"""Retrieve statements based on a given query.""" | ||
async def retrieve_chunks(store: StoreDep, request: RetrieveRequest) -> RetrieveResponse: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Resource Naming: I think it makes sense for this to be Method Naming: I can see There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It will be easier (in the UI) to treat this as a |
||
"""Retrieve chunks based on a given query.""" | ||
|
||
from llama_index.response_synthesizers import ResponseMode | ||
|
||
|
@@ -30,23 +30,23 @@ async def retrieve(store: StoreDep, request: RetrieveRequest) -> RetrieveRespons | |
|
||
return RetrieveResponse( | ||
summary=results.response, | ||
statements=statements if request.include_statements else [], | ||
chunks=statements if request.include_statements else [], | ||
) | ||
|
||
|
||
def node_to_statement(node: NodeWithScore) -> Union[TextStatement, ImageStatement]: | ||
def node_to_statement(node: NodeWithScore) -> Union[TextChunk, ImageChunk]: | ||
from llama_index.schema import ImageNode, TextNode | ||
|
||
if isinstance(node.node, TextNode): | ||
return TextStatement( | ||
return TextChunk( | ||
raw=True, | ||
score=node.score, | ||
text=node.node.text, | ||
start_char_idx=node.node.start_char_idx, | ||
end_char_idx=node.node.end_char_idx, | ||
) | ||
elif isinstance(node.node, ImageNode): | ||
return ImageStatement( | ||
return ImageChunk( | ||
score=node.score, | ||
text=node.node.text if node.node.text else None, | ||
image=node.node.image, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
from pydantic import BaseModel, ConfigDict, TypeAdapter | ||
|
||
class Collection(BaseModel): | ||
model_config=ConfigDict(from_attributes=True) | ||
|
||
"""A collection of indexed documents.""" | ||
id: int | ||
"""The ID of the collection.""" | ||
|
||
name: str | ||
"""The name of the collection.""" | ||
|
||
collection_validator = TypeAdapter(Collection) | ||
|
||
|
||
class CollectionCreate(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure how we want to standardize naming. Basically,I have:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like Litestar at least goes with
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I usually ended up with something like |
||
"""The request to create a collection.""" | ||
|
||
name: str | ||
"""The name of the collection.""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,37 @@ | ||
from typing import Annotated, List | ||
|
||
from fastapi import APIRouter, Path | ||
from sqlmodel import Session, select | ||
from pydantic import parse_obj_as | ||
|
||
from app.common.schema import Collection, EngineDep | ||
from app.common.db import PgConnectionDep | ||
from app.collections.models import * | ||
|
||
router = APIRouter(tags=["collections"], prefix="/collections") | ||
router = APIRouter(prefix="/collections") | ||
|
||
|
||
@router.put("/") | ||
async def add(engine: EngineDep, collection: Collection) -> Collection: | ||
async def add_collection(conn: PgConnectionDep, collection: CollectionCreate) -> Collection: | ||
"""Create a collection.""" | ||
with Session(engine) as session: | ||
session.add(collection) | ||
session.commit() | ||
session.refresh(collection) | ||
return collection | ||
result = await conn.fetchrow(""" | ||
INSERT INTO collection (name) VALUES ($1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
RETURNING id, name | ||
""", | ||
collection.name) | ||
return Collection.model_validate(dict(result)) | ||
|
||
|
||
@router.get("/") | ||
async def list(engine: EngineDep) -> List[Collection]: | ||
async def list_collections(conn: PgConnectionDep) -> List[Collection]: | ||
"""List collections.""" | ||
with Session(engine) as session: | ||
return session.exec(select(Collection)).all() | ||
results = await conn.fetch("SELECT id, name FROM collection") | ||
return [Collection.model_validate(dict(result)) for result in results] | ||
|
||
|
||
PathCollectionId = Annotated[int, Path(..., description="The collection ID.")] | ||
|
||
|
||
@router.get("/{id}") | ||
async def get(id: PathCollectionId, engine: EngineDep) -> Collection: | ||
async def get_collection(id: PathCollectionId, conn: PgConnectionDep) -> Collection: | ||
"""Get a specific collection.""" | ||
with Session(engine) as session: | ||
return session.get(Collection, id) | ||
result = await conn.fetchrow("SELECT id, name FROM collection WHERE id = $1", id) | ||
return Collection.model_validate(dict(result)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import contextlib | ||
from enum import Enum | ||
from typing import Annotated, AsyncIterator, Optional | ||
from uuid import UUID | ||
import asyncpg | ||
|
||
from fastapi import Depends, Request | ||
|
||
@contextlib.asynccontextmanager | ||
async def create_pool(dsn: str) -> AsyncIterator[asyncpg.Pool]: | ||
""" | ||
Create a postgres connection pool. | ||
|
||
Arguments: | ||
- dsn: Connection arguments specified using as a single string in | ||
the following format: | ||
`postgres://user:pass@host:port/database?option=value`. | ||
""" | ||
pool = await asyncpg.create_pool(dsn) | ||
yield pool | ||
pool.close() | ||
|
||
def _pg_pool(request: Request) -> asyncpg.Pool: | ||
return request.state.pg_pool | ||
|
||
PgPoolDep = Annotated[asyncpg.Pool, Depends(_pg_pool)] | ||
|
||
async def _pg_connection(pool: PgPoolDep) -> asyncpg.Connection: | ||
async with pool.acquire() as connection: | ||
yield connection | ||
|
||
PgConnectionDep = Annotated[asyncpg.Connection, Depends(_pg_connection)] |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from enum import Enum | ||
from typing import Optional | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class IngestState(Enum): | ||
PENDING = "pending" | ||
"""Document is pending ingestion.""" | ||
|
||
INGESTED = "ingested" | ||
"""Document has been ingested.""" | ||
|
||
FAILED = "failed" | ||
"""Document failed to be ingested. See `ingest_errors` for details.""" | ||
|
||
class Document(BaseModel): | ||
"""Schema for documents in the SQL DB.""" | ||
|
||
id: Optional[int] = None | ||
collection_id: int | ||
|
||
url: str | ||
|
||
ingest_state: Optional[IngestState] = None | ||
ingest_error: Optional[str] = None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
attempting to s/Statements/Chunk