Skip to content

Commit

Permalink
Merge pull request #13 from DewyKB/rm/flatten
Browse files Browse the repository at this point in the history
Flatten documents to be top-level
  • Loading branch information
kerinin authored Jan 24, 2024
2 parents 2c2d540 + bea6d86 commit e8c7e3d
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 19 deletions.
6 changes: 6 additions & 0 deletions app/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

from pydantic import BaseModel

class CreateRequest(BaseModel):
"""The name of the collection the document should be added to."""
collection_id: int

"""The URL of the document to add."""
url: str

class IngestState(Enum):
PENDING = "pending"
Expand Down
43 changes: 24 additions & 19 deletions app/documents/router.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
from typing import Annotated, List

import asyncpg
from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Path, status
from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Path, status, Query
from loguru import logger

from app.collections.router import PathCollectionId
from app.common.db import PgConnectionDep, PgPoolDep
from app.documents.models import Document
from app.ingest.extract import extract
from app.ingest.extract.source import ExtractSource
from app.ingest.store import Store, StoreDep

# TODO: Move this to `/documents`. Will require figuring out
# how to specify the collection for create, list, etc.
router = APIRouter(prefix="/collections/{collection_id}/documents")
from .models import CreateRequest

router = APIRouter(prefix="/documents")

# We can't use the session from the request because it ends as soon
# as the request completes. So we need to pass the engine and start
Expand Down Expand Up @@ -53,11 +51,10 @@ async def ingest_document(id: int, store: Store, pg_pool: asyncpg.Pool):

@router.put("/")
async def add_document(
collection_id: PathCollectionId,
store: StoreDep,
pg_pool: PgPoolDep,
background: BackgroundTasks,
url: Annotated[str, Body(..., description="The URL of the document to add.")],
req: CreateRequest,
) -> Document:
"""Add a document."""

Expand All @@ -69,8 +66,8 @@ async def add_document(
VALUES ($1, $2, 'pending')
RETURNING id, collection_id, url, ingest_state, ingest_error
""",
collection_id,
url,
req.collection_id,
req.url,
)

document = Document.model_validate(dict(row))
Expand All @@ -83,31 +80,39 @@ async def add_document(

@router.get("/")
async def list_documents(
collection_id: PathCollectionId, conn: PgConnectionDep
conn: PgConnectionDep,
collection_id: Annotated[int | None, Query(description="Limit to documents associated with this collection")] = None,
) -> List[Document]:
"""List documents."""
# TODO: Test
results = await conn.fetch(
if collection_id == None:
results = await conn.fetch(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE collection_id = $1
""",
collection_id,
)
)
else:
results = await conn.fetch(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE collection_id = $1
""",
collection_id,
)
return [Document.model_validate(dict(result)) for result in results]


@router.get("/{id}")
async def get_document(
conn: PgConnectionDep, collection_id: PathCollectionId, id: PathDocumentId
conn: PgConnectionDep, id: PathDocumentId
) -> Document:
# TODO: Test / return not found?
result = await conn.fetchrow(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE id = $1 AND collection_id = $2
FROM document WHERE id = $1
""",
id,
collection_id,
)
return Document.model_validate(dict(result))

0 comments on commit e8c7e3d

Please sign in to comment.