Skip to content

Commit

Permalink
Add content too long (#106)
Browse files Browse the repository at this point in the history
Add content too long
  • Loading branch information
eyurtsev authored Mar 23, 2024
1 parent 6a6087b commit d5231a4
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 11 deletions.
17 changes: 13 additions & 4 deletions backend/server/extraction_runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,13 @@ def validate_schema(cls, v: Any) -> Dict[str, Any]:
return v


class ExtractResponse(TypedDict):
class ExtractResponse(TypedDict, total=False):
"""Response body for the extract endpoint."""

data: List[Any]
# content to long will be set to true if the content is too long
# and had to be truncated
content_too_long: Optional[bool]


def _cast_example_to_dict(example: Example) -> Dict[str, Any]:
Expand Down Expand Up @@ -203,13 +206,19 @@ async def extract_entire_document(
for text in texts
]

if settings.MAX_CHUNKS >= 1:
# Limit the number of chunks to process
# Limit the number of chunks to process
if len(extraction_requests) > settings.MAX_CHUNKS and settings.MAX_CHUNKS > 0:
content_too_long = True
extraction_requests = extraction_requests[: settings.MAX_CHUNKS]
else:
content_too_long = False

# Run extractions which may potentially yield duplicate results
extract_responses: List[ExtractResponse] = await extraction_runnable.abatch(
extraction_requests, {"max_concurrency": settings.MAX_CONCURRENCY}
)
# Deduplicate the results
return deduplicate(extract_responses)
return {
"data": deduplicate(extract_responses)["data"],
"content_too_long": content_too_long,
}
21 changes: 16 additions & 5 deletions backend/tests/unit_tests/api/test_api_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ async def test_extract_from_file() -> None:
headers=headers,
)
assert response.status_code == 200
assert response.json() == {"data": ["Test Conte"]}
assert response.json() == {
"data": ["Test Conte"],
"content_too_long": False,
}

# Vary chat model
response = await client.post(
Expand All @@ -97,7 +100,10 @@ async def test_extract_from_file() -> None:
headers=headers,
)
assert response.status_code == 200
assert response.json() == {"data": ["Test Conte"]}
assert response.json() == {
"data": ["Test Conte"],
"content_too_long": False,
}

# Test retrieval
response = await client.post(
Expand All @@ -110,7 +116,9 @@ async def test_extract_from_file() -> None:
headers=headers,
)
assert response.status_code == 200
assert response.json() == {"data": ["Test Conte"]}
assert response.json() == {
"data": ["Test Conte"],
}

# We'll use multi-form data here.
# Create a named temporary file
Expand All @@ -129,7 +137,7 @@ async def test_extract_from_file() -> None:
)

assert response.status_code == 200, response.text
assert response.json() == {"data": ["This is a "]}
assert response.json() == {"data": ["This is a "], "content_too_long": False}


@patch(
Expand Down Expand Up @@ -191,4 +199,7 @@ async def test_extract_from_large_file() -> None:
headers=headers,
)
assert response.status_code == 200
assert response.json() == {"data": ["a"]}
assert response.json() == {
"data": ["a"],
"content_too_long": True,
}
6 changes: 6 additions & 0 deletions frontend/app/components/Playground.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ export const Playground = (props: PlaygroundProps) => {
</form>
</div>
<div className="m-auto">
{data?.content_too_long && (
<Text color={"red"} margin={5}>
The content was too long to be processed. Extraction was run on a
truncated version of the content.
</Text>
)}
<Tabs variant={"enclosed"} colorScheme="blue" size="sm">
<TabList>
<Tab>Table</Tab>
Expand Down
4 changes: 3 additions & 1 deletion frontend/app/components/ResultsTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
Tr,
} from "@chakra-ui/react";

import { ExtractionResponse } from "../utils/api";

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}
Expand Down Expand Up @@ -62,7 +64,7 @@ export const ResultsTable = ({
data,
isPending,
}: {
data: { data: unknown[] } | undefined;
data: ExtractionResponse | undefined;
isPending: boolean;
}) => {
// scan all the results to determine the columns
Expand Down
3 changes: 2 additions & 1 deletion frontend/app/utils/api.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,9 @@ type ExtractionRequest = {
file?: File;
};

type ExtractionResponse = {
export type ExtractionResponse = {
data: unknown[];
content_too_long?: boolean;
};

export const runExtraction: MutationFunction<
Expand Down

0 comments on commit d5231a4

Please sign in to comment.