From ec1909d155353801af7567534a5acc21647d3153 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Wed, 25 May 2022 14:26:59 -0400 Subject: [PATCH 01/23] Experiment with direct book providers --- openlibrary/book_providers.py | 31 +++++++++++++++++-- .../book_providers/direct_read_button.html | 10 ++++++ 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 openlibrary/templates/book_providers/direct_read_button.html diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index ecca74f1c5d..2891622b84a 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -38,7 +38,7 @@ class AbstractBookProvider(Generic[TProviderMetadata]): The key in the identifiers field on editions; see https://openlibrary.org/config/edition """ - identifier_key: str + identifier_key: Optional[str] def get_olids(self, identifier): return web.ctx.site.things( @@ -229,9 +229,36 @@ def is_own_ocaid(self, ocaid: str) -> bool: return False +class DirectProvider(AbstractBookProvider): + short_name = 'direct' + identifier_key = None + + @property + def db_selector(self): + return "providers.url" + + @property + def solr_key(self): + # TODO: Not implemented yet + return None + + def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: + # It's an edition + if ed_or_solr.get('providers'): + return [b['url'] for b in ed_or_solr['providers']] + else: + # TODO: Not implemented for search/solr yet + return [] + + def render_download_options(self, edition: Edition, extra_args: list = None): + # TODO: Not implemented yet + return '' + + PROVIDER_ORDER: list[AbstractBookProvider] = [ # These providers act essentially as their own publishers, so link to the first when # we're on an edition page + DirectProvider(), LibriVoxProvider(), ProjectGutenbergProvider(), StandardEbooksProvider(), @@ -389,7 +416,7 @@ def get_best_edition( def get_solr_keys(): - return [p.solr_key for p in PROVIDER_ORDER] + return [p.solr_key for p in PROVIDER_ORDER if p] setattr(get_book_provider, 'ia', get_book_provider_by_name('ia')) # noqa: B010 diff --git a/openlibrary/templates/book_providers/direct_read_button.html b/openlibrary/templates/book_providers/direct_read_button.html new file mode 100644 index 00000000000..da35a4873f1 --- /dev/null +++ b/openlibrary/templates/book_providers/direct_read_button.html @@ -0,0 +1,10 @@ +$def with(url) + +
+ $_('Read') +
From d65f95a3796b5430d96edbd54bc2793297a16c26 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Wed, 25 May 2022 16:01:50 -0400 Subject: [PATCH 02/23] Add download links for pressbooks direct providers --- openlibrary/book_providers.py | 4 ---- .../book_providers/direct_download_options.html | 13 +++++++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 openlibrary/templates/book_providers/direct_download_options.html diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 2891622b84a..d139e9d0e4e 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -250,10 +250,6 @@ def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: # TODO: Not implemented for search/solr yet return [] - def render_download_options(self, edition: Edition, extra_args: list = None): - # TODO: Not implemented yet - return '' - PROVIDER_ORDER: list[AbstractBookProvider] = [ # These providers act essentially as their own publishers, so link to the first when diff --git a/openlibrary/templates/book_providers/direct_download_options.html b/openlibrary/templates/book_providers/direct_download_options.html new file mode 100644 index 00000000000..553b8c0362d --- /dev/null +++ b/openlibrary/templates/book_providers/direct_download_options.html @@ -0,0 +1,13 @@ +$def with(url) + +
+
+

$_('Download Options')

+ +
From 4731816d7961821dbc10e0041dba4592c9e8a1d7 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Fri, 3 Jun 2022 00:24:10 -0400 Subject: [PATCH 03/23] Only show read button if open-access book --- openlibrary/book_providers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index d139e9d0e4e..94625bce750 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -243,9 +243,14 @@ def solr_key(self): return None def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: + NS = 'http://opds-spec.org' # It's an edition if ed_or_solr.get('providers'): - return [b['url'] for b in ed_or_solr['providers']] + return [ + b.get('url') or b.get('href') + for b in ed_or_solr['providers'] + if b.get('url') or b.get('rel') == f"{NS}/acquisition/open-access" + ] else: # TODO: Not implemented for search/solr yet return [] From e8756ae5067caebd21ed14a03c738244ad3c15ba Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Fri, 3 Jun 2022 01:27:27 -0400 Subject: [PATCH 04/23] Make direct book providers support OPDS samples --- openlibrary/book_providers.py | 28 ++++++++++++++++++- .../book_providers/direct_read_button.html | 27 ++++++++++++------ static/css/components/buttonCta.less | 5 ++++ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 94625bce750..c11c9483258 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -249,12 +249,38 @@ def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: return [ b.get('url') or b.get('href') for b in ed_or_solr['providers'] - if b.get('url') or b.get('rel') == f"{NS}/acquisition/open-access" + if b.get('url') + or ( + b.get('rel') + in ( + f"{NS}/acquisition/open-access", + f"{NS}/acquisition/sample", + ) + ) ] else: # TODO: Not implemented for search/solr yet return [] + def render_read_button(self, ed_or_solr: Union[Edition, dict]): + NS = 'http://opds-spec.org' + + def acq_sort(b): + if b.get('url') or b.get('rel') == f'{NS}/acquisition/open-access': + return 0 + elif b.get('rel') == f'{NS}/acquisition/sample': + return 1 + else: + return 2 + + acq_sorted = sorted( + (p for p in ed_or_solr.get('providers', []) if acq_sort(p) < 2), + key=acq_sort, + ) + if not acq_sorted: + return '' + return render_template(self.get_template_path('read_button'), acq_sorted[0]) + PROVIDER_ORDER: list[AbstractBookProvider] = [ # These providers act essentially as their own publishers, so link to the first when diff --git a/openlibrary/templates/book_providers/direct_read_button.html b/openlibrary/templates/book_providers/direct_read_button.html index da35a4873f1..96fc5833721 100644 --- a/openlibrary/templates/book_providers/direct_read_button.html +++ b/openlibrary/templates/book_providers/direct_read_button.html @@ -1,10 +1,21 @@ -$def with(url) +$def with(provider) - + +$elif provider.get('rel') == 'http://opds-spec.org/acquisition/sample': + + href="$provider.get('href')" + >$_('Preview') + diff --git a/static/css/components/buttonCta.less b/static/css/components/buttonCta.less index 534ed6a2691..75dbc51bf0f 100644 --- a/static/css/components/buttonCta.less +++ b/static/css/components/buttonCta.less @@ -72,6 +72,11 @@ a.cta-btn { color: @white; &:hover { background-color: darken(@primary-blue, 20%); } } + + &--shell&--external { + background-blend-mode: difference; + } + &--shell, &--shell:link, &--shell:visited { .shell-btn--active(); } From 02f9ce0574d33d793baa8743cae33b21f2b60a76 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Mon, 17 Oct 2022 13:23:00 -0700 Subject: [PATCH 05/23] Make solr-updater take into account book providers for ebook_access --- openlibrary/book_providers.py | 134 ++++++++++++++---- .../book_providers/direct_read_button.html | 9 +- 2 files changed, 114 insertions(+), 29 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index c11c9483258..49631a76fad 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -1,4 +1,6 @@ -from typing import TypedDict, Literal, cast, TypeVar, Generic +from dataclasses import dataclass +import logging +from typing import Optional, TypedDict, Union, Literal, cast, TypeVar, Generic from collections.abc import Callable, Iterator import web @@ -10,6 +12,11 @@ from openlibrary.utils import OrderedEnum, multisort_best +logger = logging.getLogger("openlibrary.book_providers") + +ProviderAccessLiteral = Literal['sample', 'buy', 'open-access', 'borrow', 'subscribe'] + + class EbookAccess(OrderedEnum): # Keep in sync with solr/conf/enumsConfig.xml ! NO_EBOOK = 0 @@ -21,6 +28,85 @@ class EbookAccess(OrderedEnum): def to_solr_str(self): return self.name.lower() + @staticmethod + def from_provider_literal(literal: ProviderAccessLiteral) -> 'EbookAccess': + if literal == 'sample': + # We need to update solr to handle these! Requires full reindex + return EbookAccess.PRINTDISABLED + elif literal == 'buy': + return EbookAccess.NO_EBOOK + elif literal == 'open-access': + return EbookAccess.PUBLIC + elif literal == 'borrow': + return EbookAccess.BORROWABLE + elif literal == 'subscribe': + return EbookAccess.NO_EBOOK + else: + raise ValueError(f'Unknown access literal: {literal}') + + +@dataclass +class EbookProvider: + access: ProviderAccessLiteral + format: Literal['web', 'pdf', 'epub', 'audio'] + price: str | None + url: str + provider_name: str | None = None + + @property + def ebook_access(self) -> EbookAccess: + return EbookAccess.from_provider_literal(self.access) + + @staticmethod + def from_json(json: dict) -> 'EbookProvider': + if 'href' in json: + # OPDS-style provider + return EbookProvider.from_opds_json(json) + elif 'url' in json: + # Pressbooks/OL-style + return EbookProvider( + access=json.get('access', 'open-access'), + format=json.get('format', 'web'), + price=json.get('price', None), + url=json['url'], + provider_name=json.get('provider_name', None), + ) + else: + raise ValueError(f'Unknown ebook provider format: {json}') + + @staticmethod + def from_opds_json(json: dict) -> 'EbookProvider': + if json.get('properties', {}).get('indirectAcquisition', None): + mimetype = json['properties']['indirectAcquisition'][0]['type'] + else: + mimetype = json['type'] + + fmt: Literal['web', 'pdf', 'epub', 'audio'] = 'web' + if mimetype.startswith('audio/'): + fmt = 'audio' + elif mimetype == 'application/pdf': + fmt = 'pdf' + elif mimetype == 'application/epub+zip': + fmt = 'epub' + elif mimetype == 'text/html': + fmt = 'web' + else: + logger.warn(f'Unknown mimetype: {mimetype}') + fmt = 'web' + + if json.get('properties', {}).get('price', None): + price = f"{json['properties']['price']['value']} {json['properties']['price']['currency']}" + else: + price = None + + return EbookProvider( + access=json['rel'].split('/')[-1], + format=fmt, + price=price, + url=json['href'], + provider_name=json.get('name'), + ) + class IALiteMetadata(TypedDict): boxid: set[str] @@ -243,44 +329,44 @@ def solr_key(self): return None def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: - NS = 'http://opds-spec.org' # It's an edition if ed_or_solr.get('providers'): return [ - b.get('url') or b.get('href') - for b in ed_or_solr['providers'] - if b.get('url') - or ( - b.get('rel') - in ( - f"{NS}/acquisition/open-access", - f"{NS}/acquisition/sample", - ) - ) + provider.url + for provider in map(EbookProvider.from_json, ed_or_solr['providers']) + if provider.ebook_access >= EbookAccess.PRINTDISABLED ] else: # TODO: Not implemented for search/solr yet return [] def render_read_button(self, ed_or_solr: Union[Edition, dict]): - NS = 'http://opds-spec.org' - - def acq_sort(b): - if b.get('url') or b.get('rel') == f'{NS}/acquisition/open-access': - return 0 - elif b.get('rel') == f'{NS}/acquisition/sample': - return 1 - else: - return 2 - acq_sorted = sorted( - (p for p in ed_or_solr.get('providers', []) if acq_sort(p) < 2), - key=acq_sort, + ( + p + for p in map(EbookProvider.from_json, ed_or_solr.get('providers', [])) + if p.ebook_access >= EbookAccess.PRINTDISABLED + ), + key=lambda p: p.ebook_access, + reverse=True, ) if not acq_sorted: return '' return render_template(self.get_template_path('read_button'), acq_sorted[0]) + def get_access( + self, + edition: dict, + metadata: TProviderMetadata | None = None, + ) -> EbookAccess: + """ + Return the access level of the edition. + """ + # For now assume 0 is best + return EbookAccess.from_provider_literal( + EbookProvider.from_json(edition['providers'][0]).access + ) + PROVIDER_ORDER: list[AbstractBookProvider] = [ # These providers act essentially as their own publishers, so link to the first when diff --git a/openlibrary/templates/book_providers/direct_read_button.html b/openlibrary/templates/book_providers/direct_read_button.html index 96fc5833721..468c7a52673 100644 --- a/openlibrary/templates/book_providers/direct_read_button.html +++ b/openlibrary/templates/book_providers/direct_read_button.html @@ -1,21 +1,20 @@ $def with(provider) -$if provider.get('url') or provider.get('rel') == 'http://opds-spec.org/acquisition/open-access': - $ url = provider.get('url') or provider.get('href') +$if provider.access == 'open-access': -$elif provider.get('rel') == 'http://opds-spec.org/acquisition/sample': +$elif provider.access == 'sample': From 49801341556f8de927104e463d7e5f6b1c803fb2 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 18 Oct 2022 00:49:57 -0700 Subject: [PATCH 06/23] Add optional providers/description to search resp --- openlibrary/plugins/worksearch/code.py | 10 +++++- .../plugins/worksearch/schemes/__init__.py | 5 +++ .../plugins/worksearch/schemes/authors.py | 1 + .../plugins/worksearch/schemes/subjects.py | 1 + .../plugins/worksearch/schemes/works.py | 31 +++++++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/openlibrary/plugins/worksearch/code.py b/openlibrary/plugins/worksearch/code.py index 99db1a1ca44..ff402d63009 100644 --- a/openlibrary/plugins/worksearch/code.py +++ b/openlibrary/plugins/worksearch/code.py @@ -216,7 +216,9 @@ def run_solr_query( q = f'{q} {params_q}' if q else params_q if q: - solr_fields = set(fields or scheme.default_fetched_fields) + solr_fields = ( + set(fields or scheme.default_fetched_fields) - scheme.non_solr_fields + ) if 'editions' in solr_fields: solr_fields.remove('editions') solr_fields.add('editions:[subquery]') @@ -236,6 +238,12 @@ def run_solr_query( solr_result = response.json() if response else None end_time = time.time() duration = end_time - start_time + + if solr_result is not None: + non_solr_fields = set(fields) & scheme.non_solr_fields + if non_solr_fields: + scheme.add_non_solr_fields(non_solr_fields, solr_result) + return SearchResponse.from_solr_result(solr_result, sort, url, time=duration) diff --git a/openlibrary/plugins/worksearch/schemes/__init__.py b/openlibrary/plugins/worksearch/schemes/__init__.py index e75c773307b..edcac46baa6 100644 --- a/openlibrary/plugins/worksearch/schemes/__init__.py +++ b/openlibrary/plugins/worksearch/schemes/__init__.py @@ -17,6 +17,8 @@ class SearchScheme: universe: list[str] # All actual solr fields that can be in a user query all_fields: set[str] + # Fields that can be read, but which aren't stored in solr + non_solr_fields: set[str] # These fields are fetched for facets and can also be url params facet_fields: set[str] # Mapping of user-only fields to solr fields @@ -120,3 +122,6 @@ def q_to_solr_params( cur_solr_params: list[tuple[str, str]], ) -> list[tuple[str, str]]: return [('q', q)] + + def add_non_solr_fields(self, solr_fields: set[str], solr_result: dict) -> None: + raise NotImplementedError() diff --git a/openlibrary/plugins/worksearch/schemes/authors.py b/openlibrary/plugins/worksearch/schemes/authors.py index 62bd9255664..86ad2f0ef8f 100644 --- a/openlibrary/plugins/worksearch/schemes/authors.py +++ b/openlibrary/plugins/worksearch/schemes/authors.py @@ -19,6 +19,7 @@ class AuthorSearchScheme(SearchScheme): 'top_subjects', 'work_count', } + non_solr_fields: set[str] = set() facet_fields: set[str] = set() field_name_map: dict[str, str] = {} sorts = { diff --git a/openlibrary/plugins/worksearch/schemes/subjects.py b/openlibrary/plugins/worksearch/schemes/subjects.py index 432dbd48a37..3a265425345 100644 --- a/openlibrary/plugins/worksearch/schemes/subjects.py +++ b/openlibrary/plugins/worksearch/schemes/subjects.py @@ -15,6 +15,7 @@ class SubjectSearchScheme(SearchScheme): 'subject_type', 'work_count', } + non_solr_fields: set[str] = set() facet_fields: set[str] = set() field_name_map: dict[str, str] = {} sorts = { diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index 0f689ce51a5..a5f750a7d6a 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -8,6 +8,7 @@ import luqum.tree import web +import infogami from openlibrary.plugins.upstream.utils import convert_iso_to_marc from openlibrary.plugins.worksearch.schemes import SearchScheme from openlibrary.solr.query_utils import ( @@ -92,6 +93,10 @@ class WorkSearchScheme(SearchScheme): "ddc_sort", "osp_count", } + non_solr_fields = { + 'description', + 'providers', + } facet_fields = { "has_fulltext", "author_facet", @@ -509,6 +514,32 @@ def convert_work_query_to_edition_query(work_query: str) -> str: new_params.append(('editions.fl', ','.join(edition_fields))) return new_params + def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> None: + # Augment with data from db + edition_keys = [ + ed_doc['key'] + for doc in solr_result['response']['docs'] + for ed_doc in doc.get('editions', {}).get('docs', []) + ] + editions = web.ctx.site.get_many(edition_keys) + ed_key_to_record = {ed.key: ed for ed in editions if ed.key in edition_keys} + + from openlibrary.book_providers import EbookProvider + + for doc in solr_result['response']['docs']: + for ed_doc in doc.get('editions', {}).get('docs', []): + ed = ed_key_to_record.get(ed_doc['key']) + for field in non_solr_fields: + val = getattr(ed, field) + if isinstance(val, infogami.infobase.client.Nothing): + continue + if field == 'description': + ed_doc[field] = val if isinstance(val, str) else val.value + elif field == 'providers': + ed_doc[field] = [ + EbookProvider.from_json(dict(p)).__dict__ for p in val + ] + def lcc_transform(sf: luqum.tree.SearchField): # e.g. lcc:[NC1 TO NC1000] to lcc:[NC-0001.00000000 TO NC-1000.00000000] From feb72eeabbcbd2056762353193293c4a026847b6 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 18 Oct 2022 01:22:20 -0700 Subject: [PATCH 07/23] Search results page shows read when providers --- openlibrary/plugins/worksearch/code.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/openlibrary/plugins/worksearch/code.py b/openlibrary/plugins/worksearch/code.py index ff402d63009..68c3deb0e1a 100644 --- a/openlibrary/plugins/worksearch/code.py +++ b/openlibrary/plugins/worksearch/code.py @@ -309,14 +309,18 @@ def do_search( :param spellcheck_count: Not really used; should probably drop """ + fields = WorkSearchScheme.default_fetched_fields | {'editions', 'providers'} if web.cookies(sfw="").sfw == 'yes': - fields = list( - WorkSearchScheme.default_fetched_fields | {'editions'} | {'subject'} - ) - else: - fields = list(WorkSearchScheme.default_fetched_fields | {'editions'}) + fields |= {'subject'} + return run_solr_query( - WorkSearchScheme(), param, rows, page, sort, spellcheck_count, fields=fields + WorkSearchScheme(), + param, + rows, + page, + sort, + spellcheck_count, + fields=list(fields), ) From cc1dbc52888510d8cd232c409cd582ae1618903f Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 18 Oct 2022 23:07:54 -0700 Subject: [PATCH 08/23] Make EbookProvider handle html access enum --- openlibrary/book_providers.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 49631a76fad..30d7f59c603 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -63,9 +63,20 @@ def from_json(json: dict) -> 'EbookProvider': # OPDS-style provider return EbookProvider.from_opds_json(json) elif 'url' in json: + # We have an inconsistency in our API + html_access: dict[str, ProviderAccessLiteral] = { + 'read': 'open-access', + 'listen': 'open-access', + 'buy': 'buy', + 'borrow': 'borrow', + 'preview': 'sample', + } + access = json.get('access', 'open-access') + if access in html_access: + access = html_access[access] # Pressbooks/OL-style return EbookProvider( - access=json.get('access', 'open-access'), + access=access, format=json.get('format', 'web'), price=json.get('price', None), url=json['url'], From eb4234c0db2fcdea89c12eb5c1179d75dff19182 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Wed, 19 Oct 2022 08:09:05 -0700 Subject: [PATCH 09/23] Generate providers sections for TBP --- openlibrary/book_providers.py | 90 +++++++++++++++++++ .../plugins/worksearch/schemes/works.py | 17 ++-- 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 30d7f59c603..48b1fbf63ef 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -209,6 +209,18 @@ def get_access( # Most providers are for public-only ebooks right now return EbookAccess.PUBLIC + def get_ebook_providers( + self, + edition: Edition, + ) -> list[EbookProvider]: + """ + Return a list of EbookProvider objects for the edition. + """ + if edition.providers: + return [EbookProvider.from_json(dict(p)) for p in edition.providers] + else: + return [] + class InternetArchiveProvider(AbstractBookProvider[IALiteMetadata]): short_name = 'ia' @@ -292,6 +304,23 @@ def render_download_options(self, edition: Edition, extra_args: list | None = No def is_own_ocaid(self, ocaid: str) -> bool: return 'librivox' in ocaid + def get_ebook_providers( + self, + edition: Edition, + ) -> list[EbookProvider]: + """ + Return a list of EbookProvider objects for the edition. + """ + return [ + EbookProvider( + access='open-access', + format='audio', + price=None, + url=f'https://librivox.org/{self.get_best_identifier(edition)}', + provider_name=self.short_name, + ) + ] + class ProjectGutenbergProvider(AbstractBookProvider): short_name = 'gutenberg' @@ -300,6 +329,23 @@ class ProjectGutenbergProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return ocaid.endswith('gut') + def get_ebook_providers( + self, + edition: Edition, + ) -> list[EbookProvider]: + """ + Return a list of EbookProvider objects for the edition. + """ + return [ + EbookProvider( + access='open-access', + format='web', + price=None, + url=f'https://www.gutenberg.org/ebooks/{self.get_best_identifier(edition)}', + provider_name=self.short_name, + ) + ] + class StandardEbooksProvider(AbstractBookProvider): short_name = 'standard_ebooks' @@ -309,6 +355,33 @@ def is_own_ocaid(self, ocaid: str) -> bool: # Standard ebooks isn't archived on IA return False + def get_ebook_providers( + self, + edition: Edition, + ) -> list[EbookProvider]: + """ + Return a list of EbookProvider objects for the edition. + """ + standard_ebooks_id = self.get_best_identifier(edition) + base_url = 'https://standardebooks.org/ebooks/' + standard_ebooks_id + flat_id = standard_ebooks_id.replace('/', '_') + return [ + EbookProvider( + access='open-access', + format='web', + price=None, + url=f'{base_url}/text/single-page', + provider_name=self.short_name, + ), + EbookProvider( + access='open-access', + format='epub', + price=None, + url=f'{base_url}/downloads/{flat_id}.epub', + provider_name=self.short_name, + ), + ] + class OpenStaxProvider(AbstractBookProvider): short_name = 'openstax' @@ -317,6 +390,23 @@ class OpenStaxProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return False + def get_ebook_providers( + self, + edition: Edition, + ) -> list[EbookProvider]: + """ + Return a list of EbookProvider objects for the edition. + """ + return [ + EbookProvider( + access='open-access', + format='web', + price=None, + url=f'https://openstax.org/details/books/{self.get_best_identifier(edition)}', + provider_name=self.short_name, + ) + ] + class CitaPressProvider(AbstractBookProvider): short_name = 'cita_press' diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index a5f750a7d6a..4ee8fb063f2 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -524,21 +524,24 @@ def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> N editions = web.ctx.site.get_many(edition_keys) ed_key_to_record = {ed.key: ed for ed in editions if ed.key in edition_keys} - from openlibrary.book_providers import EbookProvider + from openlibrary.book_providers import get_book_provider for doc in solr_result['response']['docs']: for ed_doc in doc.get('editions', {}).get('docs', []): ed = ed_key_to_record.get(ed_doc['key']) for field in non_solr_fields: val = getattr(ed, field) - if isinstance(val, infogami.infobase.client.Nothing): - continue - if field == 'description': - ed_doc[field] = val if isinstance(val, str) else val.value - elif field == 'providers': + if field == 'providers': + provider = get_book_provider(ed) + if not provider: + continue ed_doc[field] = [ - EbookProvider.from_json(dict(p)).__dict__ for p in val + p.__dict__ for p in provider.get_ebook_providers(ed) ] + elif isinstance(val, infogami.infobase.client.Nothing): + continue + elif field == 'description': + ed_doc[field] = val if isinstance(val, str) else val.value def lcc_transform(sf: luqum.tree.SearchField): From 77b1896e5250f99b767a4678079a9a3fdd830933 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Wed, 19 Oct 2022 09:32:39 -0700 Subject: [PATCH 10/23] TMP: Hide weird /request linsk from testing! --- openlibrary/templates/type/edition/view.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openlibrary/templates/type/edition/view.html b/openlibrary/templates/type/edition/view.html index 72193c4ae7d..6ec142adfcb 100644 --- a/openlibrary/templates/type/edition/view.html +++ b/openlibrary/templates/type/edition/view.html @@ -97,6 +97,12 @@ $var title: $title + + $code: def has_any(*keys): return any(edition[k] for k in keys) From b8083feaab9bd2a3db4206751d8418e649589b36 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Sat, 19 Nov 2022 03:33:24 -0500 Subject: [PATCH 11/23] Fix mypy --- openlibrary/plugins/worksearch/schemes/works.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index 4ee8fb063f2..5f16aaaf224 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -515,13 +515,15 @@ def convert_work_query_to_edition_query(work_query: str) -> str: return new_params def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> None: + from openlibrary.plugins.upstream.models import Edition + # Augment with data from db edition_keys = [ ed_doc['key'] for doc in solr_result['response']['docs'] for ed_doc in doc.get('editions', {}).get('docs', []) ] - editions = web.ctx.site.get_many(edition_keys) + editions = cast(list[Edition], web.ctx.site.get_many(edition_keys)) ed_key_to_record = {ed.key: ed for ed in editions if ed.key in edition_keys} from openlibrary.book_providers import get_book_provider @@ -529,6 +531,7 @@ def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> N for doc in solr_result['response']['docs']: for ed_doc in doc.get('editions', {}).get('docs', []): ed = ed_key_to_record.get(ed_doc['key']) + assert ed for field in non_solr_fields: val = getattr(ed, field) if field == 'providers': From 4e4b87092583c7ed29c6239c766eabde99c92072 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 15:50:20 +0000 Subject: [PATCH 12/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openlibrary/i18n/messages.pot | 38 +++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot index 09c934151a7..7b5797dd081 100644 --- a/openlibrary/i18n/messages.pot +++ b/openlibrary/i18n/messages.pot @@ -272,7 +272,8 @@ msgstr "" msgid "YAML Representation:" msgstr "" -#: BookPreview.html books/edit/edition.html editpage.html +#: BookPreview.html book_providers/direct_read_button.html +#: books/edit/edition.html editpage.html msgid "Preview" msgstr "" @@ -836,6 +837,7 @@ msgid "Currently Reading" msgstr "" #: LoanReadForm.html ReadButton.html book_providers/cita_press_read_button.html +#: book_providers/direct_read_button.html #: book_providers/gutenberg_read_button.html #: book_providers/openstax_read_button.html #: book_providers/standard_ebooks_read_button.html books/custom_carousel.html @@ -2194,12 +2196,14 @@ msgid "BookReader" msgstr "" #: admin/loans_table.html book_providers/cita_press_download_options.html +#: book_providers/direct_download_options.html #: book_providers/ia_download_options.html #: book_providers/openstax_download_options.html books/edit/edition.html msgid "PDF" msgstr "" -#: admin/loans_table.html book_providers/gutenberg_download_options.html +#: admin/loans_table.html book_providers/direct_download_options.html +#: book_providers/gutenberg_download_options.html #: book_providers/ia_download_options.html #: book_providers/standard_ebooks_download_options.html books/edit/edition.html msgid "ePub" @@ -2826,6 +2830,7 @@ msgid "Died" msgstr "" #: book_providers/cita_press_download_options.html +#: book_providers/direct_download_options.html #: book_providers/gutenberg_download_options.html #: book_providers/ia_download_options.html #: book_providers/librivox_download_options.html @@ -2863,6 +2868,31 @@ msgstr "" msgid "Learn more" msgstr "" +#: book_providers/direct_download_options.html +msgid "Download as an ePub" +msgstr "" + +#: book_providers/direct_download_options.html +msgid "Download as a PDF" +msgstr "" + +#: book_providers/direct_download_options.html +msgid "Download as a Kindle MOBI" +msgstr "" + +#: book_providers/direct_download_options.html +#: book_providers/ia_download_options.html +msgid "MOBI" +msgstr "" + +#: book_providers/direct_download_options.html +msgid "More options" +msgstr "" + +#: book_providers/direct_read_button.html +msgid "Read free online" +msgstr "" + #: book_providers/gutenberg_download_options.html msgid "Download an HTML from Project Gutenberg" msgstr "" @@ -2926,10 +2956,6 @@ msgstr "" msgid "Download a MOBI file from Internet Archive" msgstr "" -#: book_providers/ia_download_options.html -msgid "MOBI" -msgstr "" - #: book_providers/ia_download_options.html msgid "Download open DAISY from Internet Archive (print-disabled format)" msgstr "" From 2268e5354eed1073838ae246b9c7d305cf632bdf Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Wed, 12 Jun 2024 13:32:53 -0700 Subject: [PATCH 13/23] Linting: type hints + exception for too many branches in worksearch --- openlibrary/book_providers.py | 12 ++++++------ openlibrary/plugins/worksearch/code.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 48b1fbf63ef..68bd2d99405 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -78,9 +78,9 @@ def from_json(json: dict) -> 'EbookProvider': return EbookProvider( access=access, format=json.get('format', 'web'), - price=json.get('price', None), + price=json.get('price'), url=json['url'], - provider_name=json.get('provider_name', None), + provider_name=json.get('provider_name'), ) else: raise ValueError(f'Unknown ebook provider format: {json}') @@ -102,7 +102,7 @@ def from_opds_json(json: dict) -> 'EbookProvider': elif mimetype == 'text/html': fmt = 'web' else: - logger.warn(f'Unknown mimetype: {mimetype}') + logger.warning(f'Unknown mimetype: {mimetype}') fmt = 'web' if json.get('properties', {}).get('price', None): @@ -135,7 +135,7 @@ class AbstractBookProvider(Generic[TProviderMetadata]): The key in the identifiers field on editions; see https://openlibrary.org/config/edition """ - identifier_key: Optional[str] + identifier_key: str | None def get_olids(self, identifier): return web.ctx.site.things( @@ -429,7 +429,7 @@ def solr_key(self): # TODO: Not implemented yet return None - def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: + def get_identifiers(self, ed_or_solr: Edition | dict) -> list[str]: # It's an edition if ed_or_solr.get('providers'): return [ @@ -441,7 +441,7 @@ def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]: # TODO: Not implemented for search/solr yet return [] - def render_read_button(self, ed_or_solr: Union[Edition, dict]): + def render_read_button(self, ed_or_solr: Edition | dict): acq_sorted = sorted( ( p diff --git a/openlibrary/plugins/worksearch/code.py b/openlibrary/plugins/worksearch/code.py index 68c3deb0e1a..f122de43c65 100644 --- a/openlibrary/plugins/worksearch/code.py +++ b/openlibrary/plugins/worksearch/code.py @@ -130,7 +130,7 @@ def execute_solr_query( public(has_solr_editions_enabled) -def run_solr_query( +def run_solr_query( # noqa: PLR0912 scheme: SearchScheme, param: dict | None = None, rows=100, From 54b5f913d4cfc732cce41182e8172c72a7dd6e4b Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 18 Jun 2024 18:02:13 -0700 Subject: [PATCH 14/23] Basic /borrow -> webbook functionality --- openlibrary/book_providers.py | 2 +- openlibrary/i18n/messages.pot | 4 ++++ openlibrary/plugins/upstream/borrow.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 68bd2d99405..bf9efbf8419 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -1,6 +1,6 @@ from dataclasses import dataclass import logging -from typing import Optional, TypedDict, Union, Literal, cast, TypeVar, Generic +from typing import TypedDict, Literal, cast, TypeVar, Generic from collections.abc import Callable, Iterator import web diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot index 7b5797dd081..52535188dc4 100644 --- a/openlibrary/i18n/messages.pot +++ b/openlibrary/i18n/messages.pot @@ -3146,6 +3146,10 @@ msgstr "" msgid "Oxford University Press; Penguin; W.W. Norton" msgstr "" +#: books/add.html +msgid "For a web book, use the URL." +msgstr "" + #: books/add.html books/edit/edition.html msgid "When was it published?" msgstr "" diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py index 0f7d03cbcd6..f4b0d43fe6d 100644 --- a/openlibrary/plugins/upstream/borrow.py +++ b/openlibrary/plugins/upstream/borrow.py @@ -130,6 +130,18 @@ def POST(self, key): if not edition: raise web.notfound() + from openlibrary.book_providers import get_book_provider + + # Direct to the first web book if at least one is available. + if ( + action == "borrow" + and (provider := get_book_provider(edition)) + and (providers := provider.get_ebook_providers(edition)) + and providers[0].access == "open-access" + ): + stats.increment('ol.loans.webbook') + raise web.seeother(providers[0].url) + archive_url = get_bookreader_stream_url(edition.ocaid) + '?ref=ol' if i._autoReadAloud is not None: archive_url += '&_autoReadAloud=show' From 5a2a98077026e18b90f0bcf8dc9343c10e679218 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 11:31:04 -0700 Subject: [PATCH 15/23] Revert "Add LCP download links for IA" This reverts commit 152eec75f0b9c795f650f1cea7a36179dfb6a80b. --- openlibrary/templates/book_providers/ia_download_options.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/templates/book_providers/ia_download_options.html b/openlibrary/templates/book_providers/ia_download_options.html index a32e226ba72..99516e94513 100644 --- a/openlibrary/templates/book_providers/ia_download_options.html +++ b/openlibrary/templates/book_providers/ia_download_options.html @@ -4,7 +4,7 @@

$_("Download Options")

    - $if formats['pdf']: + $if formats.get('pdf'):
  • $_("PDF")
  • $if formats['txt']:
  • $_("Plain text")
  • From 7418b02ce753a0659d131797fbea4b987eb5163a Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 11:39:46 -0700 Subject: [PATCH 16/23] Linting: placate ruff --- openlibrary/book_providers.py | 4 +++- openlibrary/plugins/upstream/borrow.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index bf9efbf8419..3f5e30ad5f0 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -441,7 +441,9 @@ def get_identifiers(self, ed_or_solr: Edition | dict) -> list[str]: # TODO: Not implemented for search/solr yet return [] - def render_read_button(self, ed_or_solr: Edition | dict): + def render_read_button( + self, ed_or_solr: Edition | dict, analytics_attr: Callable[[str], str] + ): acq_sorted = sorted( ( p diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py index f4b0d43fe6d..e5a2acf2401 100644 --- a/openlibrary/plugins/upstream/borrow.py +++ b/openlibrary/plugins/upstream/borrow.py @@ -112,7 +112,7 @@ class borrow(delegate.page): def GET(self, key): return self.POST(key) - def POST(self, key): + def POST(self, key): # noqa: PLR0915 """Called when the user wants to borrow the edition""" i = web.input( From 1d8fd6c9c34a5f6161378f6ad201e02aba832e36 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 12:15:17 -0700 Subject: [PATCH 17/23] Fix: use conditions in place of assert for edition existence --- .../plugins/worksearch/schemes/works.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index 5f16aaaf224..359bda9bee9 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -530,21 +530,20 @@ def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> N for doc in solr_result['response']['docs']: for ed_doc in doc.get('editions', {}).get('docs', []): - ed = ed_key_to_record.get(ed_doc['key']) - assert ed - for field in non_solr_fields: - val = getattr(ed, field) - if field == 'providers': - provider = get_book_provider(ed) - if not provider: + if ed := ed_key_to_record.get(ed_doc['key']): + for field in non_solr_fields: + val = getattr(ed, field) + if field == 'providers': + provider = get_book_provider(ed) + if not provider: + continue + ed_doc[field] = [ + p.__dict__ for p in provider.get_ebook_providers(ed) + ] + elif isinstance(val, infogami.infobase.client.Nothing): continue - ed_doc[field] = [ - p.__dict__ for p in provider.get_ebook_providers(ed) - ] - elif isinstance(val, infogami.infobase.client.Nothing): - continue - elif field == 'description': - ed_doc[field] = val if isinstance(val, str) else val.value + elif field == 'description': + ed_doc[field] = val if isinstance(val, str) else val.value def lcc_transform(sf: luqum.tree.SearchField): From c1891d572bddc0632b2764a29abb37109e26c05c Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 12:20:52 -0700 Subject: [PATCH 18/23] Feature: ensure action==read also succeeds Both `/borrow?action==borrow` and `/borrow?action==read` should succeed vis-a-vis web book access. --- openlibrary/plugins/upstream/borrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py index e5a2acf2401..ae75d4b4326 100644 --- a/openlibrary/plugins/upstream/borrow.py +++ b/openlibrary/plugins/upstream/borrow.py @@ -134,7 +134,7 @@ def POST(self, key): # noqa: PLR0915 # Direct to the first web book if at least one is available. if ( - action == "borrow" + action in ["borrow", "read"] and (provider := get_book_provider(edition)) and (providers := provider.get_ebook_providers(edition)) and providers[0].access == "open-access" From abaf398290376f56a6614398c62e266f7021b624 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 13:45:09 -0700 Subject: [PATCH 19/23] Refactor: rename provider -> acquisition --- openlibrary/book_providers.py | 82 ++++++++++--------- .../plugins/worksearch/schemes/works.py | 2 +- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 3f5e30ad5f0..935933c0775 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -14,7 +14,9 @@ logger = logging.getLogger("openlibrary.book_providers") -ProviderAccessLiteral = Literal['sample', 'buy', 'open-access', 'borrow', 'subscribe'] +AcquisitionAccessLiteral = Literal[ + 'sample', 'buy', 'open-access', 'borrow', 'subscribe' +] class EbookAccess(OrderedEnum): @@ -29,7 +31,7 @@ def to_solr_str(self): return self.name.lower() @staticmethod - def from_provider_literal(literal: ProviderAccessLiteral) -> 'EbookAccess': + def from_acquisition_literal(literal: AcquisitionAccessLiteral) -> 'EbookAccess': if literal == 'sample': # We need to update solr to handle these! Requires full reindex return EbookAccess.PRINTDISABLED @@ -46,25 +48,25 @@ def from_provider_literal(literal: ProviderAccessLiteral) -> 'EbookAccess': @dataclass -class EbookProvider: - access: ProviderAccessLiteral +class Acquisition: + access: AcquisitionAccessLiteral format: Literal['web', 'pdf', 'epub', 'audio'] price: str | None url: str - provider_name: str | None = None + acquisition_name: str | None = None @property def ebook_access(self) -> EbookAccess: - return EbookAccess.from_provider_literal(self.access) + return EbookAccess.from_acquisition_literal(self.access) @staticmethod - def from_json(json: dict) -> 'EbookProvider': + def from_json(json: dict) -> 'Acquisition': if 'href' in json: # OPDS-style provider - return EbookProvider.from_opds_json(json) + return Acquisition.from_opds_json(json) elif 'url' in json: # We have an inconsistency in our API - html_access: dict[str, ProviderAccessLiteral] = { + html_access: dict[str, AcquisitionAccessLiteral] = { 'read': 'open-access', 'listen': 'open-access', 'buy': 'buy', @@ -75,18 +77,18 @@ def from_json(json: dict) -> 'EbookProvider': if access in html_access: access = html_access[access] # Pressbooks/OL-style - return EbookProvider( + return Acquisition( access=access, format=json.get('format', 'web'), price=json.get('price'), url=json['url'], - provider_name=json.get('provider_name'), + acquisition_name=json.get('acquisition_name'), ) else: - raise ValueError(f'Unknown ebook provider format: {json}') + raise ValueError(f'Unknown ebook acquisition format: {json}') @staticmethod - def from_opds_json(json: dict) -> 'EbookProvider': + def from_opds_json(json: dict) -> 'Acquisition': if json.get('properties', {}).get('indirectAcquisition', None): mimetype = json['properties']['indirectAcquisition'][0]['type'] else: @@ -110,12 +112,12 @@ def from_opds_json(json: dict) -> 'EbookProvider': else: price = None - return EbookProvider( + return Acquisition( access=json['rel'].split('/')[-1], format=fmt, price=price, url=json['href'], - provider_name=json.get('name'), + acquisition_name=json.get('name'), ) @@ -209,15 +211,15 @@ def get_access( # Most providers are for public-only ebooks right now return EbookAccess.PUBLIC - def get_ebook_providers( + def get_ebook_acquisitions( self, edition: Edition, - ) -> list[EbookProvider]: + ) -> list[Acquisition]: """ Return a list of EbookProvider objects for the edition. """ if edition.providers: - return [EbookProvider.from_json(dict(p)) for p in edition.providers] + return [Acquisition.from_json(dict(p)) for p in edition.providers] else: return [] @@ -304,20 +306,20 @@ def render_download_options(self, edition: Edition, extra_args: list | None = No def is_own_ocaid(self, ocaid: str) -> bool: return 'librivox' in ocaid - def get_ebook_providers( + def get_ebook_acquisitions( self, edition: Edition, - ) -> list[EbookProvider]: + ) -> list[Acquisition]: """ Return a list of EbookProvider objects for the edition. """ return [ - EbookProvider( + Acquisition( access='open-access', format='audio', price=None, url=f'https://librivox.org/{self.get_best_identifier(edition)}', - provider_name=self.short_name, + acquisition_name=self.short_name, ) ] @@ -329,20 +331,20 @@ class ProjectGutenbergProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return ocaid.endswith('gut') - def get_ebook_providers( + def get_ebook_acquisitions( self, edition: Edition, - ) -> list[EbookProvider]: + ) -> list[Acquisition]: """ Return a list of EbookProvider objects for the edition. """ return [ - EbookProvider( + Acquisition( access='open-access', format='web', price=None, url=f'https://www.gutenberg.org/ebooks/{self.get_best_identifier(edition)}', - provider_name=self.short_name, + acquisition_name=self.short_name, ) ] @@ -355,10 +357,10 @@ def is_own_ocaid(self, ocaid: str) -> bool: # Standard ebooks isn't archived on IA return False - def get_ebook_providers( + def get_ebook_acquisitions( self, edition: Edition, - ) -> list[EbookProvider]: + ) -> list[Acquisition]: """ Return a list of EbookProvider objects for the edition. """ @@ -366,19 +368,19 @@ def get_ebook_providers( base_url = 'https://standardebooks.org/ebooks/' + standard_ebooks_id flat_id = standard_ebooks_id.replace('/', '_') return [ - EbookProvider( + Acquisition( access='open-access', format='web', price=None, url=f'{base_url}/text/single-page', - provider_name=self.short_name, + acquisition_name=self.short_name, ), - EbookProvider( + Acquisition( access='open-access', format='epub', price=None, url=f'{base_url}/downloads/{flat_id}.epub', - provider_name=self.short_name, + acquisition_name=self.short_name, ), ] @@ -390,20 +392,20 @@ class OpenStaxProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return False - def get_ebook_providers( + def get_ebook_acquisitions( self, edition: Edition, - ) -> list[EbookProvider]: + ) -> list[Acquisition]: """ Return a list of EbookProvider objects for the edition. """ return [ - EbookProvider( + Acquisition( access='open-access', format='web', price=None, url=f'https://openstax.org/details/books/{self.get_best_identifier(edition)}', - provider_name=self.short_name, + acquisition_name=self.short_name, ) ] @@ -434,7 +436,7 @@ def get_identifiers(self, ed_or_solr: Edition | dict) -> list[str]: if ed_or_solr.get('providers'): return [ provider.url - for provider in map(EbookProvider.from_json, ed_or_solr['providers']) + for provider in map(Acquisition.from_json, ed_or_solr['providers']) if provider.ebook_access >= EbookAccess.PRINTDISABLED ] else: @@ -447,7 +449,7 @@ def render_read_button( acq_sorted = sorted( ( p - for p in map(EbookProvider.from_json, ed_or_solr.get('providers', [])) + for p in map(Acquisition.from_json, ed_or_solr.get('providers', [])) if p.ebook_access >= EbookAccess.PRINTDISABLED ), key=lambda p: p.ebook_access, @@ -466,8 +468,8 @@ def get_access( Return the access level of the edition. """ # For now assume 0 is best - return EbookAccess.from_provider_literal( - EbookProvider.from_json(edition['providers'][0]).access + return EbookAccess.from_acquisition_literal( + Acquisition.from_json(edition['providers'][0]).access ) diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index 359bda9bee9..40e4d211250 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -538,7 +538,7 @@ def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> N if not provider: continue ed_doc[field] = [ - p.__dict__ for p in provider.get_ebook_providers(ed) + p.__dict__ for p in provider.get_ebook_acquisitions(ed) ] elif isinstance(val, infogami.infobase.client.Nothing): continue From 2f9d60e239217d13d9666019f895f1e49b59122a Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 15:28:51 -0700 Subject: [PATCH 20/23] Fix: omit None from solr fields Query: is this something where the fix should occur upstream? --- openlibrary/utils/solr.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openlibrary/utils/solr.py b/openlibrary/utils/solr.py index 3f8f06d514b..aa602cac04e 100644 --- a/openlibrary/utils/solr.py +++ b/openlibrary/utils/solr.py @@ -47,7 +47,14 @@ def get( logger.info(f"solr /get: {key}, {fields}") resp = self.session.get( f"{self.base_url}/get", - params={'id': key, **({'fl': ','.join(fields)} if fields else {})}, + params={ + 'id': key, + **( + {'fl': ','.join([field for field in fields if field])} + if fields + else {} + ), + }, ).json() # Solr returns {doc: null} if the record isn't there From 6298877d91ba226b566ce4d45b8acbc881621747 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Tue, 9 Jul 2024 20:12:15 -0700 Subject: [PATCH 21/23] Feature: add default generic toast for direct providers --- openlibrary/i18n/messages.pot | 10 ++++++++++ .../templates/book_providers/direct_read_button.html | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot index 52535188dc4..799efd8466a 100644 --- a/openlibrary/i18n/messages.pot +++ b/openlibrary/i18n/messages.pot @@ -451,6 +451,7 @@ msgstr "" #: BookPreview.html CreateListModal.html DonateModal.html NotesModal.html #: ObservationsModal.html ShareModal.html #: book_providers/cita_press_read_button.html +#: book_providers/direct_read_button.html #: book_providers/gutenberg_read_button.html #: book_providers/librivox_read_button.html #: book_providers/openstax_read_button.html @@ -2893,6 +2894,15 @@ msgstr "" msgid "Read free online" msgstr "" +#: book_providers/direct_read_button.html +#, python-format +msgid "" +"This book is available from %s, a site external to " +"Open Library. By visiting this external site, you will be subject to that" +" site's privacy policy and terms of use. Open Library cannot make any " +"guarantees about this external site's content." +msgstr "" + #: book_providers/gutenberg_download_options.html msgid "Download an HTML from Project Gutenberg" msgstr "" diff --git a/openlibrary/templates/book_providers/direct_read_button.html b/openlibrary/templates/book_providers/direct_read_button.html index 468c7a52673..b74dfdb43a8 100644 --- a/openlibrary/templates/book_providers/direct_read_button.html +++ b/openlibrary/templates/book_providers/direct_read_button.html @@ -7,6 +7,8 @@ title="$_('Read free online')" class="cta-btn cta-btn--available cta-btn--read cta-btn--external cta-btn--direct" target="_blank" + aria-haspopup="true" + aria-controls="direct-provider-toast" >$_('Read')
@@ -18,3 +20,11 @@ href="$(provider.url)" >$_('Preview') + +$if render_once('direct-provider-toast'): + From 8961a53b6aecbe3270637d54cb58c1f6b8ac5668 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Wed, 10 Jul 2024 14:30:35 -0700 Subject: [PATCH 22/23] Implement code review feedback --- openlibrary/book_providers.py | 40 ++++++++++++++----- openlibrary/i18n/messages.pot | 11 ++--- openlibrary/plugins/upstream/borrow.py | 7 ++-- .../plugins/worksearch/schemes/works.py | 29 ++++++++------ .../book_providers/direct_read_button.html | 15 ++++--- openlibrary/templates/type/edition/view.html | 6 --- 6 files changed, 61 insertions(+), 47 deletions(-) diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 935933c0775..36f32bce947 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -1,7 +1,8 @@ from dataclasses import dataclass import logging -from typing import TypedDict, Literal, cast, TypeVar, Generic from collections.abc import Callable, Iterator +from typing import TypedDict, Literal, cast, TypeVar, Generic +from urllib import parse import web from web import uniq @@ -31,7 +32,7 @@ def to_solr_str(self): return self.name.lower() @staticmethod - def from_acquisition_literal(literal: AcquisitionAccessLiteral) -> 'EbookAccess': + def from_acquisition_access(literal: AcquisitionAccessLiteral) -> 'EbookAccess': if literal == 'sample': # We need to update solr to handle these! Requires full reindex return EbookAccess.PRINTDISABLED @@ -49,6 +50,11 @@ def from_acquisition_literal(literal: AcquisitionAccessLiteral) -> 'EbookAccess' @dataclass class Acquisition: + """ + Acquisition represents a book resource found on another website, such as + Standard Ebooks. + """ + access: AcquisitionAccessLiteral format: Literal['web', 'pdf', 'epub', 'audio'] price: str | None @@ -57,7 +63,7 @@ class Acquisition: @property def ebook_access(self) -> EbookAccess: - return EbookAccess.from_acquisition_literal(self.access) + return EbookAccess.from_acquisition_access(self.access) @staticmethod def from_json(json: dict) -> 'Acquisition': @@ -211,12 +217,12 @@ def get_access( # Most providers are for public-only ebooks right now return EbookAccess.PUBLIC - def get_ebook_acquisitions( + def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: """ - Return a list of EbookProvider objects for the edition. + Return a list of Acquisition objects for the edition. """ if edition.providers: return [Acquisition.from_json(dict(p)) for p in edition.providers] @@ -306,7 +312,7 @@ def render_download_options(self, edition: Edition, extra_args: list | None = No def is_own_ocaid(self, ocaid: str) -> bool: return 'librivox' in ocaid - def get_ebook_acquisitions( + def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: @@ -331,7 +337,7 @@ class ProjectGutenbergProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return ocaid.endswith('gut') - def get_ebook_acquisitions( + def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: @@ -357,7 +363,7 @@ def is_own_ocaid(self, ocaid: str) -> bool: # Standard ebooks isn't archived on IA return False - def get_ebook_acquisitions( + def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: @@ -392,7 +398,7 @@ class OpenStaxProvider(AbstractBookProvider): def is_own_ocaid(self, ocaid: str) -> bool: return False - def get_ebook_acquisitions( + def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: @@ -457,7 +463,19 @@ def render_read_button( ) if not acq_sorted: return '' - return render_template(self.get_template_path('read_button'), acq_sorted[0]) + + acquisition = acq_sorted[0] + # pre-process acquisition.url so ParseResult.netloc is always the domain. Only netloc is used. + url = ( + "https://" + acquisition.url + if not acquisition.url.startswith("http") + else acquisition.url + ) + parsed_url = parse.urlparse(url) + domain = parsed_url.netloc + return render_template( + self.get_template_path('read_button'), acquisition, domain + ) def get_access( self, @@ -468,7 +486,7 @@ def get_access( Return the access level of the edition. """ # For now assume 0 is best - return EbookAccess.from_acquisition_literal( + return EbookAccess.from_acquisition_access( Acquisition.from_json(edition['providers'][0]).access ) diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot index 799efd8466a..8cb0a64b97a 100644 --- a/openlibrary/i18n/messages.pot +++ b/openlibrary/i18n/messages.pot @@ -2861,6 +2861,7 @@ msgid "" msgstr "" #: book_providers/cita_press_read_button.html +#: book_providers/direct_read_button.html #: book_providers/gutenberg_read_button.html #: book_providers/librivox_read_button.html #: book_providers/openstax_read_button.html @@ -2897,10 +2898,8 @@ msgstr "" #: book_providers/direct_read_button.html #, python-format msgid "" -"This book is available from %s, a site external to " -"Open Library. By visiting this external site, you will be subject to that" -" site's privacy policy and terms of use. Open Library cannot make any " -"guarantees about this external site's content." +"This book is freely available from %s, an external " +"third-party book provider." msgstr "" #: book_providers/gutenberg_download_options.html @@ -3156,10 +3155,6 @@ msgstr "" msgid "Oxford University Press; Penguin; W.W. Norton" msgstr "" -#: books/add.html -msgid "For a web book, use the URL." -msgstr "" - #: books/add.html books/edit/edition.html msgid "When was it published?" msgstr "" diff --git a/openlibrary/plugins/upstream/borrow.py b/openlibrary/plugins/upstream/borrow.py index ae75d4b4326..bf17089bb7e 100644 --- a/openlibrary/plugins/upstream/borrow.py +++ b/openlibrary/plugins/upstream/borrow.py @@ -136,11 +136,12 @@ def POST(self, key): # noqa: PLR0915 if ( action in ["borrow", "read"] and (provider := get_book_provider(edition)) - and (providers := provider.get_ebook_providers(edition)) - and providers[0].access == "open-access" + and provider.short_name != "ia" + and (acquisitions := provider.get_acquisitions(edition)) + and acquisitions[0].access == "open-access" ): stats.increment('ol.loans.webbook') - raise web.seeother(providers[0].url) + raise web.seeother(acquisitions[0].url) archive_url = get_bookreader_stream_url(edition.ocaid) + '?ref=ol' if i._autoReadAloud is not None: diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py index 40e4d211250..38de36a0b51 100644 --- a/openlibrary/plugins/worksearch/schemes/works.py +++ b/openlibrary/plugins/worksearch/schemes/works.py @@ -530,20 +530,23 @@ def add_non_solr_fields(self, non_solr_fields: set[str], solr_result: dict) -> N for doc in solr_result['response']['docs']: for ed_doc in doc.get('editions', {}).get('docs', []): - if ed := ed_key_to_record.get(ed_doc['key']): - for field in non_solr_fields: - val = getattr(ed, field) - if field == 'providers': - provider = get_book_provider(ed) - if not provider: - continue - ed_doc[field] = [ - p.__dict__ for p in provider.get_ebook_acquisitions(ed) - ] - elif isinstance(val, infogami.infobase.client.Nothing): + # `ed` could be `None` if the record has been deleted and Solr not yet updated. + if not (ed := ed_key_to_record.get(ed_doc['key'])): + continue + + for field in non_solr_fields: + val = getattr(ed, field) + if field == 'providers': + provider = get_book_provider(ed) + if not provider: continue - elif field == 'description': - ed_doc[field] = val if isinstance(val, str) else val.value + ed_doc[field] = [ + p.__dict__ for p in provider.get_acquisitions(ed) + ] + elif isinstance(val, infogami.infobase.client.Nothing): + continue + elif field == 'description': + ed_doc[field] = val if isinstance(val, str) else val.value def lcc_transform(sf: luqum.tree.SearchField): diff --git a/openlibrary/templates/book_providers/direct_read_button.html b/openlibrary/templates/book_providers/direct_read_button.html index b74dfdb43a8..d6f56b79253 100644 --- a/openlibrary/templates/book_providers/direct_read_button.html +++ b/openlibrary/templates/book_providers/direct_read_button.html @@ -1,9 +1,11 @@ -$def with(provider) +$def with(acquisition, domain) +$# :param Acquisition acquisition: +$# :param domain str: -$if provider.access == 'open-access': +$if acquisition.access == 'open-access': -$elif provider.access == 'sample': +$elif acquisition.access == 'sample': $if render_once('direct-provider-toast'): diff --git a/openlibrary/templates/type/edition/view.html b/openlibrary/templates/type/edition/view.html index 6ec142adfcb..72193c4ae7d 100644 --- a/openlibrary/templates/type/edition/view.html +++ b/openlibrary/templates/type/edition/view.html @@ -97,12 +97,6 @@ $var title: $title - - $code: def has_any(*keys): return any(edition[k] for k in keys) From 84bdb7d1d056330f661004f860f208685511d7a8 Mon Sep 17 00:00:00 2001 From: Scott Barnes Date: Wed, 17 Jul 2024 10:33:18 -0700 Subject: [PATCH 23/23] Implement changes from code review --- openlibrary/book_providers.py | 39 +++++++------------ openlibrary/i18n/messages.pot | 38 +++++------------- .../direct_download_options.html | 13 ------- openlibrary/templates/books/add.html | 7 ++-- openlibrary/templates/books/edit/edition.html | 10 ++--- openlibrary/utils/solr.py | 1 + 6 files changed, 34 insertions(+), 74 deletions(-) delete mode 100644 openlibrary/templates/book_providers/direct_download_options.html diff --git a/openlibrary/book_providers.py b/openlibrary/book_providers.py index 36f32bce947..097cbec822f 100644 --- a/openlibrary/book_providers.py +++ b/openlibrary/book_providers.py @@ -53,13 +53,15 @@ class Acquisition: """ Acquisition represents a book resource found on another website, such as Standard Ebooks. + + Wording inspired by OPDS; see https://specs.opds.io/opds-1.2#23-acquisition-feeds """ access: AcquisitionAccessLiteral format: Literal['web', 'pdf', 'epub', 'audio'] price: str | None url: str - acquisition_name: str | None = None + provider_name: str | None = None @property def ebook_access(self) -> EbookAccess: @@ -88,7 +90,7 @@ def from_json(json: dict) -> 'Acquisition': format=json.get('format', 'web'), price=json.get('price'), url=json['url'], - acquisition_name=json.get('acquisition_name'), + provider_name=json.get('provider_name'), ) else: raise ValueError(f'Unknown ebook acquisition format: {json}') @@ -123,7 +125,7 @@ def from_opds_json(json: dict) -> 'Acquisition': format=fmt, price=price, url=json['href'], - acquisition_name=json.get('name'), + provider_name=json.get('name'), ) @@ -221,9 +223,6 @@ def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: - """ - Return a list of Acquisition objects for the edition. - """ if edition.providers: return [Acquisition.from_json(dict(p)) for p in edition.providers] else: @@ -316,16 +315,13 @@ def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: - """ - Return a list of EbookProvider objects for the edition. - """ return [ Acquisition( access='open-access', format='audio', price=None, url=f'https://librivox.org/{self.get_best_identifier(edition)}', - acquisition_name=self.short_name, + provider_name=self.short_name, ) ] @@ -341,16 +337,13 @@ def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: - """ - Return a list of EbookProvider objects for the edition. - """ return [ Acquisition( access='open-access', format='web', price=None, url=f'https://www.gutenberg.org/ebooks/{self.get_best_identifier(edition)}', - acquisition_name=self.short_name, + provider_name=self.short_name, ) ] @@ -367,9 +360,6 @@ def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: - """ - Return a list of EbookProvider objects for the edition. - """ standard_ebooks_id = self.get_best_identifier(edition) base_url = 'https://standardebooks.org/ebooks/' + standard_ebooks_id flat_id = standard_ebooks_id.replace('/', '_') @@ -379,14 +369,14 @@ def get_acquisitions( format='web', price=None, url=f'{base_url}/text/single-page', - acquisition_name=self.short_name, + provider_name=self.short_name, ), Acquisition( access='open-access', format='epub', price=None, url=f'{base_url}/downloads/{flat_id}.epub', - acquisition_name=self.short_name, + provider_name=self.short_name, ), ] @@ -402,16 +392,13 @@ def get_acquisitions( self, edition: Edition, ) -> list[Acquisition]: - """ - Return a list of EbookProvider objects for the edition. - """ return [ Acquisition( access='open-access', format='web', price=None, url=f'https://openstax.org/details/books/{self.get_best_identifier(edition)}', - acquisition_name=self.short_name, + provider_name=self.short_name, ) ] @@ -477,6 +464,10 @@ def render_read_button( self.get_template_path('read_button'), acquisition, domain ) + def render_download_options(self, edition: Edition, extra_args: list | None = None): + # Return an empty string until #9581 is addressed. + return "" + def get_access( self, edition: dict, @@ -652,7 +643,7 @@ def get_best_edition( def get_solr_keys(): - return [p.solr_key for p in PROVIDER_ORDER if p] + return [p.solr_key for p in PROVIDER_ORDER if p.solr_key] setattr(get_book_provider, 'ia', get_book_provider_by_name('ia')) # noqa: B010 diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot index 8cb0a64b97a..44738ca8f48 100644 --- a/openlibrary/i18n/messages.pot +++ b/openlibrary/i18n/messages.pot @@ -2197,14 +2197,12 @@ msgid "BookReader" msgstr "" #: admin/loans_table.html book_providers/cita_press_download_options.html -#: book_providers/direct_download_options.html #: book_providers/ia_download_options.html #: book_providers/openstax_download_options.html books/edit/edition.html msgid "PDF" msgstr "" -#: admin/loans_table.html book_providers/direct_download_options.html -#: book_providers/gutenberg_download_options.html +#: admin/loans_table.html book_providers/gutenberg_download_options.html #: book_providers/ia_download_options.html #: book_providers/standard_ebooks_download_options.html books/edit/edition.html msgid "ePub" @@ -2831,7 +2829,6 @@ msgid "Died" msgstr "" #: book_providers/cita_press_download_options.html -#: book_providers/direct_download_options.html #: book_providers/gutenberg_download_options.html #: book_providers/ia_download_options.html #: book_providers/librivox_download_options.html @@ -2870,27 +2867,6 @@ msgstr "" msgid "Learn more" msgstr "" -#: book_providers/direct_download_options.html -msgid "Download as an ePub" -msgstr "" - -#: book_providers/direct_download_options.html -msgid "Download as a PDF" -msgstr "" - -#: book_providers/direct_download_options.html -msgid "Download as a Kindle MOBI" -msgstr "" - -#: book_providers/direct_download_options.html -#: book_providers/ia_download_options.html -msgid "MOBI" -msgstr "" - -#: book_providers/direct_download_options.html -msgid "More options" -msgstr "" - #: book_providers/direct_read_button.html msgid "Read free online" msgstr "" @@ -2965,6 +2941,10 @@ msgstr "" msgid "Download a MOBI file from Internet Archive" msgstr "" +#: book_providers/ia_download_options.html +msgid "MOBI" +msgstr "" + #: book_providers/ia_download_options.html msgid "Download open DAISY from Internet Archive (print-disabled format)" msgstr "" @@ -3923,10 +3903,6 @@ msgstr "" msgid "Provider Name" msgstr "" -#: ReadButton.html books/edit/edition.html -msgid "Listen" -msgstr "" - #: books/edit/edition.html msgid "Buy" msgstr "" @@ -7164,6 +7140,10 @@ msgstr "" msgid "Read ebook from Internet Archive" msgstr "" +#: ReadButton.html +msgid "Listen" +msgstr "" + #: ReadMore.html msgid "Read more" msgstr "" diff --git a/openlibrary/templates/book_providers/direct_download_options.html b/openlibrary/templates/book_providers/direct_download_options.html deleted file mode 100644 index 553b8c0362d..00000000000 --- a/openlibrary/templates/book_providers/direct_download_options.html +++ /dev/null @@ -1,13 +0,0 @@ -$def with(url) - -
-
-

$_('Download Options')

- -
diff --git a/openlibrary/templates/books/add.html b/openlibrary/templates/books/add.html index 65905b4ecbc..f0fee03207c 100644 --- a/openlibrary/templates/books/add.html +++ b/openlibrary/templates/books/add.html @@ -57,7 +57,7 @@

$_("Add a book to Open Library")

- $ admin_user = ctx.user and (ctx.user.is_admin() or ctx.user.is_super_librarian()) + $ is_privileged = ctx.user and (ctx.user.is_admin() or ctx.user.is_super_librarian())
@@ -66,7 +66,7 @@

$_("Add a book to Open Library")

- @@ -687,7 +687,7 @@
-$if ctx.user and ctx.user.is_admin(): +$if is_admin:
$_("Admin Only")
diff --git a/openlibrary/utils/solr.py b/openlibrary/utils/solr.py index aa602cac04e..ca68c3dd0c4 100644 --- a/openlibrary/utils/solr.py +++ b/openlibrary/utils/solr.py @@ -47,6 +47,7 @@ def get( logger.info(f"solr /get: {key}, {fields}") resp = self.session.get( f"{self.base_url}/get", + # It's unclear how field=None is getting in here; a better fix would be at the source. params={ 'id': key, **(