Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flatten documents to be top-level #13

Merged
merged 2 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions app/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

from pydantic import BaseModel

class CreateRequest(BaseModel):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DocumentCreate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a couple different patterns - I may try to normalize in future PRs

"""The name of the collection the document should be added to."""
collection_id: int

"""The URL of the document to add."""
url: str

class IngestState(Enum):
PENDING = "pending"
Expand Down
43 changes: 24 additions & 19 deletions app/documents/router.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
from typing import Annotated, List

import asyncpg
from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Path, status
from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Path, status, Query
from loguru import logger

from app.collections.router import PathCollectionId
from app.common.db import PgConnectionDep, PgPoolDep
from app.documents.models import Document
from app.ingest.extract import extract
from app.ingest.extract.source import ExtractSource
from app.ingest.store import Store, StoreDep

# TODO: Move this to `/documents`. Will require figuring out
# how to specify the collection for create, list, etc.
router = APIRouter(prefix="/collections/{collection_id}/documents")
from .models import CreateRequest

router = APIRouter(prefix="/documents")

# We can't use the session from the request because it ends as soon
# as the request completes. So we need to pass the engine and start
Expand Down Expand Up @@ -53,11 +51,10 @@ async def ingest_document(id: int, store: Store, pg_pool: asyncpg.Pool):

@router.put("/")
async def add_document(
collection_id: PathCollectionId,
store: StoreDep,
pg_pool: PgPoolDep,
background: BackgroundTasks,
url: Annotated[str, Body(..., description="The URL of the document to add.")],
req: CreateRequest,
) -> Document:
"""Add a document."""

Expand All @@ -69,8 +66,8 @@ async def add_document(
VALUES ($1, $2, 'pending')
RETURNING id, collection_id, url, ingest_state, ingest_error
""",
collection_id,
url,
req.collection_id,
req.url,
)

document = Document.model_validate(dict(row))
Expand All @@ -83,31 +80,39 @@ async def add_document(

@router.get("/")
async def list_documents(
collection_id: PathCollectionId, conn: PgConnectionDep
conn: PgConnectionDep,
collection_id: Annotated[int | None, Query(description="Limit to documents associated with this collection")] = None,
) -> List[Document]:
"""List documents."""
# TODO: Test
results = await conn.fetch(
if collection_id == None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential concern here around auth -- maybe require it for now?

results = await conn.fetch(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE collection_id = $1
""",
collection_id,
)
)
else:
results = await conn.fetch(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE collection_id = $1
""",
collection_id,
)
return [Document.model_validate(dict(result)) for result in results]


@router.get("/{id}")
async def get_document(
conn: PgConnectionDep, collection_id: PathCollectionId, id: PathDocumentId
conn: PgConnectionDep, id: PathDocumentId
) -> Document:
# TODO: Test / return not found?
result = await conn.fetchrow(
"""
SELECT id, collection_id, url, ingest_state, ingest_error
FROM document WHERE id = $1 AND collection_id = $2
FROM document WHERE id = $1
""",
id,
collection_id,
)
return Document.model_validate(dict(result))