Skip to content

Commit

Permalink
Update openlibrary/solr/update_work.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mekarpeles committed Mar 11, 2022
1 parent 843b06c commit f7dc06c
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 22 deletions.
2 changes: 1 addition & 1 deletion openlibrary/solr/data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

logger = logging.getLogger("openlibrary.solr.data_provider")

IA_METADATA_FIELDS = ('identifier', 'boxid', 'collection')
IA_METADATA_FIELDS = ('identifier', 'boxid', 'collection', 'access-restricted-item')
OCAID_PATTERN = re.compile(r'^[^\s&#?/]+$')


Expand Down
26 changes: 12 additions & 14 deletions openlibrary/solr/update_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def get_list(d, key):
return {
'boxid': set(get_list(metadata, 'boxid')),
'collection': set(get_list(metadata, 'collection')),
'access-restricted-item': metadata.get('access-restricted-item'),
}


Expand Down Expand Up @@ -725,7 +726,8 @@ def get_last_modified(self, work, editions):
datetimestr_to_int(doc.get('last_modified')) for doc in [work] + editions
)

def add_ebook_info(self, doc, editions):
@staticmethod
def add_ebook_info(doc, editions):
"""
Add ebook information from the editions to the work Solr document.
Expand All @@ -740,24 +742,18 @@ def add(name, value):
def add_list(name, values):
doc[name] = list(values)

# Q: How do I change what values I have available from Archive.org metadata?
# Because I'm pretty sure we're missing e.g. lending___status etc...
# It looks like it may? Just not computed values
# 1. is in inlibrary? i.e. borrowable
# 2. is it not access-restricted-item?

borrowable_editions = set()
printdisabled_editions = set()
open_editions = set()
unclassified_editions = set()

language_editions = {}

pub_goog = set() # google
pub_nongoog = set()
nonpub_goog = set()
nonpub_nongoog = set()

printdisabled = set()
all_collection = set()
public_scan = False
lending_edition = None
Expand All @@ -772,18 +768,20 @@ def add_list(name, values):
ocaid = e['ocaid'].strip()
collections = e.get('ia_collection', [])
all_collection.update(collections)

if 'inlibrary' in collections:
borrowable_editions.add(ocaid)
elif 'printdisabled' in collections:
printdisabled_editions.add(ocaid)
printdisabled.add(re_edition_key.match(e['key']).group(1))
elif not e.get('access_restricted_item', False):
elif e.get('access_restricted_item', False) == "true":
unclassified_editions.add(ocaid)
else:
public_scan = True
open_editions.add(ocaid)
else:
unclassified_editions.add(ocaid)

# Legacy
if 'printdisabled' in collections:
printdisabled.add(re_edition_key.match(e['key']).group(1))
# partners may still rely on these legacy fields, leave logic unchanged
if not lending_edition and 'lendinglibrary' in e.get('ia_collection', []):
lending_edition = re_edition_key.match(e['key']).group(1)
Expand Down
47 changes: 40 additions & 7 deletions openlibrary/tests/solr/test_update_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from openlibrary.solr import update_work
from openlibrary.solr.data_provider import DataProvider
from openlibrary.solr.update_work import (
SolrProcessor,
build_data,
pick_cover_edition,
pick_number_of_pages_median,
Expand Down Expand Up @@ -252,7 +253,7 @@ async def test_with_one_lending_edition(self):
w,
key="/books/OL1M",
ocaid='foo00bar',
_ia_meta={"collection": ['lendinglibrary', 'americana']},
_ia_meta={"collection": ['inlibrary', 'americana']},
),
]
)
Expand All @@ -262,7 +263,7 @@ async def test_with_one_lending_edition(self):
assert 'printdisabled_s' not in d
assert d['lending_edition_s'] == 'OL1M'
assert d['ia'] == ['foo00bar']
assert sss(d['ia_collection_s']) == sss("americana;lendinglibrary")
assert sss(d['ia_collection_s']) == sss("americana;inlibrary")
assert d['edition_count'] == 1
assert d['ebook_count_i'] == 1

Expand All @@ -276,13 +277,13 @@ async def test_with_two_lending_editions(self):
w,
key="/books/OL1M",
ocaid='foo01bar',
_ia_meta={"collection": ['lendinglibrary', 'americana']},
_ia_meta={"collection": ['inlibrary', 'americana']},
),
make_edition(
w,
key="/books/OL2M",
ocaid='foo02bar',
_ia_meta={"collection": ['lendinglibrary', 'internetarchivebooks']},
_ia_meta={"collection": ['inlibrary', 'internetarchivebooks']},
),
]
)
Expand All @@ -293,7 +294,7 @@ async def test_with_two_lending_editions(self):
assert d['lending_edition_s'] == 'OL1M'
assert sorted(d['ia']) == ['foo01bar', 'foo02bar']
assert sss(d['ia_collection_s']) == sss(
"lendinglibrary;americana;internetarchivebooks"
"inlibrary;americana;internetarchivebooks"
)
assert d['edition_count'] == 2
assert d['ebook_count_i'] == 2
Expand Down Expand Up @@ -363,7 +364,7 @@ async def test_with_multiple_editions(self):
w,
key="/books/OL3M",
ocaid='foo01bar',
_ia_meta={"collection": ['lendinglibrary', 'americana']},
_ia_meta={"collection": ['inlibrary', 'americana']},
),
make_edition(
w,
Expand All @@ -380,7 +381,7 @@ async def test_with_multiple_editions(self):
assert d['lending_edition_s'] == 'OL3M'
assert sorted(d['ia']) == ['foo00bar', 'foo01bar', 'foo02bar']
assert sss(d['ia_collection_s']) == sss(
"americana;inlibrary;lendinglibrary;printdisabled"
"americana;inlibrary;printdisabled"
)

assert d['edition_count'] == 4
Expand Down Expand Up @@ -687,3 +688,35 @@ def test_normal_case(self):
assert pick_number_of_pages_median(eds) == 122
eds = [{}, {}] + [{'number_of_pages': n} for n in [123, 122, 1]]
assert pick_number_of_pages_median(eds) == 122

class Test_Sort_Editions_Ocaids:

def test_sort(self):
doc = {}
editions = [{
"key": "/books/OL789M",
"ocaid": "ocaid_restricted",
"access_restricted_item": "true",
"ia_collection": []
}, {
"key": "/books/OL567M",
"ocaid": "ocaid_printdisabled",
"access_restricted_item": "true",
"ia_collection": ["printdisabled"]
}, {
"key": "/books/OL234M",
"ocaid": "ocaid_borrowable",
"access_restricted_item": "true",
"ia_collection": ["inlibrary"]
}, {
"key": "/books/OL123M",
"ocaid": "ocaid_open",
"access_restricted_item": "false",
"ia_collection": []
}]
SolrProcessor.add_ebook_info(doc, editions)
assert len(doc.get('ia', [])) == 4
assert doc['ia'][0] == "ocaid_open"
assert doc['ia'][1] == "ocaid_borrowable"
assert doc['ia'][2] == "ocaid_printdisabled"
assert doc['ia'][3] == "ocaid_restricted"

0 comments on commit f7dc06c

Please sign in to comment.