Skip to content

Commit

Permalink
Rewrite reservation history fetch logic
Browse files Browse the repository at this point in the history
  • Loading branch information
ttuovinen committed May 8, 2024
1 parent 467bc2c commit 14c72eb
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 74 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
POSTGRES_URL = "postgresql://palace:test@localhost:5432/circ"
OPENSEARCH_URL = "http://localhost:9200"
OPENSEARCH_EVENT_INDEX = "circulation-events-v1"
OPENSEARCH_WORK_INDEX = "circulation-works-v5"
ROOT_PATH = ""
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class Settings(BaseSettings):
POSTGRES_URL: str = ""
OPENSEARCH_URL: str = ""
OPENSEARCH_EVENT_INDEX: str = ""
OPENSEARCH_WORK_INDEX: str = ""
ROOT_PATH: str = ""

model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
Expand Down
12 changes: 0 additions & 12 deletions lib/models.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
from pydantic import BaseModel


class LibMeta(BaseModel):
collection: str
page: int
page_size: int
total: int


class Reservation(BaseModel):
count: int
identifier: str
title: str
author: str


class Reservations(BaseModel):
meta: LibMeta
data: list[Reservation]


class TokenData(BaseModel):
id: int
label: str
Expand Down
103 changes: 63 additions & 40 deletions lib/opensearch.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import datetime
from collections import Counter
from fastapi import HTTPException
from opensearchpy import OpenSearch

from config import settings
from lib.models import LibMeta, Reservation, Reservations
from lib.models import Reservation


use_ssl = settings.OPENSEARCH_URL.startswith("https://")
Expand All @@ -30,19 +29,16 @@ def field(hit, field, default=""):
def get_reservation_events(
os_client: OpenSearch,
collection_name: str,
page: int,
size: int,
from_date: datetime.date | None = None,
to_date: datetime.date | None = None,
):
"""
Retrieves reservation events from OpenSearch on a given (or not given) date frame
Parameters:
- os_client: OpenSearch client
- collection_name: (str): the name of the collection to filter by
(NOTE: events unfortunately don't have collection ids so we use name here)
- page (int): the page number to retrieve
- size (int): the number of items per page
- from_date (datetime.date, optional): the start date for filtering
- to_date (datetime.date, optional): the end date for filtering
Expand All @@ -53,7 +49,9 @@ def get_reservation_events(
if not collection_name:
raise HTTPException(status_code=404, detail="Invalid collection configuration")

must: list = [
# 1) Fetch identifier counts from hold events as aggregations

event_must: list = [
{"term": {"type": "circulation_manager_hold_place"}},
{"term": {"collection": collection_name}},
]
Expand All @@ -64,40 +62,65 @@ def get_reservation_events(
range["gte"] = from_date
if to_date:
range["lte"] = to_date
must.append({"range": {"start": range}})
event_must.append({"range": {"start": range}})

query = {
"size": size,
"from": (page - 1) * size,
"query": {"bool": {"must": must}},
event_query = {
"size": 0,
"query": {"bool": {"must": event_must}},
"aggs": {"identifier": {"terms": {"field": "identifier", "size": 1000000}}},
}

result = os_client.search(index=settings.OPENSEARCH_EVENT_INDEX, body=query)
total = result.get("hits", {}).get("total", {}).get("value", 0)
hits = result.get("hits", {}).get("hits", [])

# Extract identifiers from hits and count their occurrences
identifiers = (field(hit, "identifier") for hit in hits)
identifier_counts = Counter(identifiers)
data = []
seen_identifiers = set() # We need only one item per identifier

for hit in hits:
identifier = field(hit, "identifier")
if identifier not in seen_identifiers:
data.append(
Reservation(
identifier=identifier,
title=field(hit, "title"),
author=field(hit, "author"),
count=identifier_counts[identifier],
)
)
seen_identifiers.add(identifier)

meta = LibMeta(collection=collection_name, page=page, page_size=size, total=total)

return Reservations(
meta=meta,
data=data,
event_result = os_client.search(
index=settings.OPENSEARCH_EVENT_INDEX, body=event_query
)

identifier_buckets = event_result["aggregations"]["identifier"]["buckets"]
identifiers = [bucket["key"] for bucket in identifier_buckets]

# 2) Fetch work data for each identifier from works index

source_fields = [
"identifiers",
"title",
"author",
]
BATCH_SIZE = 10000
works_map = {}

while len(identifiers) > 0:
identifiers_to_fetch = identifiers[:BATCH_SIZE]
identifiers = identifiers[BATCH_SIZE:]
work_query = {
"size": BATCH_SIZE,
"_source": source_fields,
"query": {
"nested": {
"path": "identifiers",
"query": {
"terms": {"identifiers.identifier": identifiers_to_fetch}
},
}
},
}
work_result = os_client.search(
index=settings.OPENSEARCH_WORK_INDEX, body=work_query
)

for hit in work_result.get("hits", {}).get("hits", []):
for identifier in hit.get("_source", {}).get("identifiers", []):
works_map[identifier["identifier"]] = hit["_source"]

# 3) Combine identifier counts with work data

def make_reservation_info(bucket):
work = works_map.get(bucket.get("key"), {})
return Reservation(
identifier=bucket.get("key"),
title=work.get("title", ""),
author=work.get("author", ""),
count=bucket.get("doc_count"),
)

data = [make_reservation_info(bucket) for bucket in identifier_buckets]

return data
8 changes: 2 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
get_holds_with_edition_data,
get_reservations_for_identifier,
)
from lib.models import Reservation, Reservations, TokenData
from lib.models import Reservation, TokenData
from lib.opensearch import get_os_client, get_reservation_events

app = FastAPI(
Expand Down Expand Up @@ -80,8 +80,6 @@ def read_active_reservations_for_license_pool(
@app.get("/reservation-history")
def read_reservation_history(
os_client: OpenSearch = Depends(get_os_client),
page: int = 1,
size: int = 100,
from_date: datetime.date | None = Query(
default=None,
alias="from",
Expand All @@ -93,12 +91,10 @@ def read_reservation_history(
description="Format: YYYY-MM-DD",
),
token_data: TokenData = Depends(get_token_data),
) -> Reservations:
) -> list[Reservation]:
return get_reservation_events(
os_client=os_client,
collection_name=token_data.collection_name,
page=page,
size=size,
from_date=from_date,
to_date=to_date,
)
70 changes: 59 additions & 11 deletions tests/opensearch_testsetup.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,88 @@
from unittest.mock import MagicMock
from config import settings

mock_event_response = {
"hits": {"total": {"value": 7, "relation": "eq"}, "hits": []},
"aggregations": {
"identifier": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "111",
"doc_count": 3,
},
{
"key": "222",
"doc_count": 2,
},
{
"key": "333",
"doc_count": 1,
},
],
}
},
}


mock_search_response = {
mock_works_response = {
"hits": {
"total": {"value": 3, "relation": "eq"},
"hits": [
{
"_source": {
"identifier": "111",
"identifiers": [
{
"type": "ISBN",
"identifier": "111",
}
],
"title": "Book 1",
"author": "Author 1",
"start": "2024-02-01T10:00:00.148927+00:00",
}
},
{
"_source": {
"identifier": "111",
"title": "Book 1",
"author": "Author 1",
"start": "2024-03-01T06:27:16.148927+00:00",
"identifiers": [
{
"type": "ISBN",
"identifier": "222",
}
],
"title": "Book 2",
"author": "Author 2",
}
},
{
"_source": {
"identifier": "222",
"title": "Book 2",
"identifiers": [
{
"type": "ISBN",
"identifier": "333",
}
],
"title": "Book 3",
"author": "Author 2",
"start": "2024-04-01T06:27:16.148927+00:00",
}
},
],
}
}


def mock_search_side_effect(*args, **kwargs):
if kwargs["index"] == settings.OPENSEARCH_EVENT_INDEX:
return mock_event_response
elif kwargs["index"] == settings.OPENSEARCH_WORK_INDEX:
return mock_works_response
else:
raise ValueError("Unexpected index")


# Create and configure the mock
mock_os_client = MagicMock()
mock_os_client.search.return_value = mock_search_response
mock_os_client.search.side_effect = mock_search_side_effect


def override_get_os_client():
Expand Down
24 changes: 19 additions & 5 deletions tests/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,22 @@ def test_get_reservation_history(self):
assert response.status_code == 200

output = response.json()
assert len(output["data"]) == 2
# First book has two reservations
assert output["data"][0]["count"] == 2
# Second book has one reservation
assert output["data"][1]["count"] == 1
assert len(output) == 3
# Get the books by identifiers
book_map = {item["identifier"]: item for item in output}
first_book = book_map.get("111", {})
second_book = book_map.get("222", {})
third_book = book_map.get("333", {})
# Test counts
assert first_book["count"] == 3
assert second_book["count"] == 2
assert third_book["count"] == 1
# Test identifiers
assert first_book["identifier"] == "111"
assert second_book["identifier"] == "222"
assert third_book["identifier"] == "333"
# Test title and author fields
assert first_book["title"] == "Book 1"
assert first_book["author"] == "Author 1"
assert second_book["title"] == "Book 2"
assert second_book["author"] == "Author 2"

0 comments on commit 14c72eb

Please sign in to comment.