Skip to content

Commit

Permalink
Merge pull request #6038 from agmckee/support-iso-8601-date-for-first…
Browse files Browse the repository at this point in the history
…-published-year-when-indexing

Use SolrProcessor.get_pub_year to get edition years
  • Loading branch information
cdrini authored Mar 8, 2022
2 parents 34093b7 + b7214e7 commit 47b9461
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
9 changes: 3 additions & 6 deletions openlibrary/solr/update_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
re_author_key = re.compile(r'^/(?:a|authors)/(OL\d+A)')
re_bad_char = re.compile('[\x01\x0b\x1a-\x1e]')
re_edition_key = re.compile(r"/books/([^/]+)")
re_iso_date = re.compile(r'^(\d{4})-\d\d-\d\d$')
re_solr_field = re.compile(r'^[-\w]+$', re.U)
re_year = re.compile(r'(\d{4})$')
re_year = re.compile(r'\b(\d{4})\b')

# This will be set to a data provider; have faith, mypy!
data_provider = cast(DataProvider, None)
Expand Down Expand Up @@ -491,9 +490,6 @@ def get_pub_year(self, e):
"""
pub_date = e.get('publish_date', None)
if pub_date:
m = re_iso_date.match(pub_date)
if m:
return m.group(1)
m = re_year.search(pub_date)
if m:
return m.group(1)
Expand Down Expand Up @@ -595,8 +591,9 @@ def add_list(name, values):
pub_dates = {e[k] for e in editions if e.get(k)}
add_list(k, pub_dates)
pub_years = {
m.group(1) for m in (re_year.search(date) for date in pub_dates) if m
self.get_pub_year(e) for e in editions
}
pub_years = pub_years - {None,}
if pub_years:
add_list('publish_year', pub_years)
add('first_publish_year', min(int(y) for y in pub_years))
Expand Down
6 changes: 4 additions & 2 deletions openlibrary/tests/solr/test_update_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,20 @@ def test_publish_year(self):
test_dates = [
"2000",
"Another 2000",
"2001-01-02", # Doesn't seems to be handling this case
"2001-01-02", # ISO 8601 formatted dates now supported
"01-02-2003",
"2004 May 23",
"Jan 2002",
"Bad date 12",
"Bad date 123412314",
]
work = make_work()
update_work.data_provider = FakeDataProvider(
[work] + [make_edition(work, publish_date=date) for date in test_dates]
)

d = build_data(work)
assert sorted(d['publish_year']) == ["2000", "2002", "2003"]
assert sorted(d['publish_year']) == ["2000", "2001", "2002", "2003", "2004"]
assert d["first_publish_year"] == 2000

def test_isbns(self):
Expand Down

0 comments on commit 47b9461

Please sign in to comment.