Skip to content

Commit

Permalink
Merge pull request #8560 from cdrini/refactor/subjects-py
Browse files Browse the repository at this point in the history
Add more types to subjects.py
  • Loading branch information
mekarpeles authored Nov 30, 2023
2 parents 17982d9 + 3b3b9c2 commit cb6c053
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 89 deletions.
17 changes: 9 additions & 8 deletions openlibrary/plugins/worksearch/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,13 @@ def get_ebook_count(self, name, value, publish_year):


def setup():
d = web.storage(
name="language",
key="languages",
prefix="/languages/",
facet="language",
facet_key="language",
engine=LanguageEngine,
subjects.SUBJECTS.append(
subjects.SubjectMeta(
name="language",
key="languages",
prefix="/languages/",
facet="language",
facet_key="language",
Engine=LanguageEngine,
)
)
subjects.SUBJECTS.append(d)
17 changes: 9 additions & 8 deletions openlibrary/plugins/worksearch/publishers.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,13 @@ def get_ebook_count(self, name, value, publish_year):


def setup():
d = web.storage(
name="publisher",
key="publishers",
prefix="/publishers/",
facet="publisher_facet",
facet_key="publisher_facet",
engine=PublisherEngine,
subjects.SUBJECTS.append(
subjects.SubjectMeta(
name="publisher",
key="publishers",
prefix="/publishers/",
facet="publisher_facet",
facet_key="publisher_facet",
Engine=PublisherEngine,
)
)
subjects.SUBJECTS.append(d)
129 changes: 74 additions & 55 deletions openlibrary/plugins/worksearch/subjects.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Subject pages.
"""
from dataclasses import dataclass
import web
import json
import datetime
Expand All @@ -11,41 +12,11 @@
from openlibrary.core.models import Subject
from openlibrary.core.lending import add_availability
from openlibrary.solr.query_utils import query_dict_to_str
from openlibrary.utils import str_to_key, finddict
from openlibrary.utils import str_to_key


__all__ = ["SubjectEngine", "get_subject"]
__all__ = ["SubjectEngine", "get_subject", "SubjectMeta"]

SUBJECTS = [
web.storage(
name="person",
key="people",
prefix="/subjects/person:",
facet="person_facet",
facet_key="person_key",
),
web.storage(
name="place",
key="places",
prefix="/subjects/place:",
facet="place_facet",
facet_key="place_key",
),
web.storage(
name="time",
key="times",
prefix="/subjects/time:",
facet="time_facet",
facet_key="time_key",
),
web.storage(
name="subject",
key="subjects",
prefix="/subjects/",
facet="subject_facet",
facet_key="subject_key",
),
]

DEFAULT_RESULTS = 12
MAX_RESULTS = 1000
Expand Down Expand Up @@ -151,14 +122,22 @@ def process_key(self, key):
return key


SubjectPseudoKey = str
"""
The key-like paths for a subject, eg:
- `/subjects/foo`
- `/subjects/person:harry_potter`
"""


def get_subject(
key: str,
key: SubjectPseudoKey,
details=False,
offset=0,
sort='editions',
limit=DEFAULT_RESULTS,
**filters,
):
) -> Subject:
"""Returns data related to a subject.
By default, it returns a storage object with key, name, work_count and works.
Expand Down Expand Up @@ -214,19 +193,17 @@ def get_subject(
Optional arguments has_fulltext and published_in can be passed to filter the results.
"""

def create_engine():
for d in SUBJECTS:
if key.startswith(d.prefix):
Engine = d.get("engine") or SubjectEngine
return Engine()
return SubjectEngine()

engine = create_engine()
subject_results = engine.get_subject(
key, details=details, offset=offset, sort=sort, limit=limit, **filters
EngineClass = next(
(d.Engine for d in SUBJECTS if key.startswith(d.prefix)), SubjectEngine
)
return EngineClass().get_subject(
key,
details=details,
offset=offset,
sort=sort,
limit=limit,
**filters,
)
return subject_results


class SubjectEngine:
Expand All @@ -244,7 +221,8 @@ def get_subject(

meta = self.get_meta(key)
subject_type = meta.name
name = meta.path.replace("_", " ")
path = web.lstrips(key, meta.prefix)
name = path.replace("_", " ")

unescaped_filters = {}
if 'publish_year' in filters:
Expand All @@ -254,7 +232,7 @@ def get_subject(
WorkSearchScheme(),
{
'q': query_dict_to_str(
{meta.facet_key: self.normalize_key(meta.path)},
{meta.facet_key: self.normalize_key(path)},
unescaped=unescaped_filters,
phrase=True,
),
Expand Down Expand Up @@ -369,12 +347,10 @@ def get_subject(

return subject

def get_meta(self, key):
def get_meta(self, key) -> 'SubjectMeta':
prefix = self.parse_key(key)[0]
meta = finddict(SUBJECTS, prefix=prefix)

meta = web.storage(meta)
meta.path = web.lstrips(key, meta.prefix)
meta = next((d for d in SUBJECTS if d.prefix == prefix), None)
assert meta is not None, "Invalid subject key: {key}"
return meta

def parse_key(self, key):
Expand All @@ -397,9 +373,10 @@ def facet_wrapper(self, facet: str, value: str, label: str, count: int):
elif facet == "author_key":
return web.storage(name=label, key=f"/authors/{value}", count=count)
elif facet in ["subject_facet", "person_facet", "place_facet", "time_facet"]:
meta = next((d for d in SUBJECTS if d.facet == facet), None)
assert meta is not None, "Invalid subject facet: {facet}"
return web.storage(
key=finddict(SUBJECTS, facet=facet).prefix
+ str_to_key(value).replace(" ", "_"),
key=meta.prefix + str_to_key(value).replace(" ", "_"),
name=value,
count=count,
)
Expand Down Expand Up @@ -439,6 +416,48 @@ def work_wrapper(w: dict) -> web.storage:
)


@dataclass
class SubjectMeta:
name: str
key: str
prefix: str
facet: str
facet_key: str
Engine: type['SubjectEngine'] = SubjectEngine


SUBJECTS = [
SubjectMeta(
name="person",
key="people",
prefix="/subjects/person:",
facet="person_facet",
facet_key="person_key",
),
SubjectMeta(
name="place",
key="places",
prefix="/subjects/place:",
facet="place_facet",
facet_key="place_key",
),
SubjectMeta(
name="time",
key="times",
prefix="/subjects/time:",
facet="time_facet",
facet_key="time_key",
),
SubjectMeta(
name="subject",
key="subjects",
prefix="/subjects/",
facet="subject_facet",
facet_key="subject_key",
),
]


def setup():
"""Placeholder for doing any setup required.
Expand Down
12 changes: 0 additions & 12 deletions openlibrary/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,6 @@ def str_to_key(s: str) -> str:
return ''.join(c if c != ' ' else '_' for c in s.lower() if c not in to_drop)


def finddict(dicts, **filters):
"""Find a dictionary that matches given filter conditions.
>>> dicts = [{"x": 1, "y": 2}, {"x": 3, "y": 4}]
>>> sorted(finddict(dicts, x=1).items())
[('x', 1), ('y', 2)]
"""
for d in dicts:
if all(d.get(k) == v for k, v in filters.items()):
return d


T = TypeVar('T')


Expand Down
6 changes: 0 additions & 6 deletions openlibrary/utils/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from openlibrary.utils import (
extract_numeric_id_from_olid,
finddict,
str_to_key,
)

Expand All @@ -13,11 +12,6 @@ def test_str_to_key():
assert str_to_key('!@(X);:') == '!(x)'


def test_finddict():
dicts = [{'x': 1, 'y': 2}, {'x': 3, 'y': 4}]
assert finddict(dicts, x=1) == {'x': 1, 'y': 2}


def test_extract_numeric_id_from_olid():
assert extract_numeric_id_from_olid('/works/OL123W') == '123'
assert extract_numeric_id_from_olid('OL123W') == '123'

0 comments on commit cb6c053

Please sign in to comment.