Skip to content

Commit

Permalink
Feature: add thoth-archiving-network collection to OL import list (#9413
Browse files Browse the repository at this point in the history
)

* Feature: add thoth-archiving-network collection to OL import list

This changes the logic for the OL imports from IA to include items from
the thoth-archiving-network.

---------

Co-authored-by: Drini Cami <cdrini@gmail.com>
Co-authored-by: Mek <michael.karpeles@gmail.com>
  • Loading branch information
3 people authored Jun 11, 2024
1 parent 8d3f048 commit 7272e09
Showing 1 changed file with 32 additions and 24 deletions.
56 changes: 32 additions & 24 deletions openlibrary/core/ia.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,31 +281,39 @@ def get_candidates_url(
marcs: bool = True,
) -> str:
DAY = datetime.timedelta(days=1)
hard_requirements = ' AND '.join(
[
"mediatype:texts",
f'indexdate:{day}*',
'!collection:litigationworks',
'!is_dark:true',
# Fetch back to items added before the day of interest, since items
# sometimes take a few days to process into the collection.
f'addeddate:[{day - 60 * DAY} TO {day + 1 * DAY}]',
]
)
repub_states = ' OR '.join(
f'repub_state:{state}' for state in VALID_READY_REPUB_STATES
)
soft_requirements = ' AND '.join(
[
f'({repub_states})',
'scanningcenter:*',
'scanner:*',
'scandate:*',
'format:pdf',
# TODO: format:marc seems to be getting more records than expected
*(['format:marc'] if marcs else []),
'!collection:opensource',
'!collection:additional_collections',
'!noindex:true',
]
)
exempt_collections = ' OR '.join( # noqa: FLY002
["collection:thoth-archiving-network"]
)
params = {
'q': ' AND '.join(
[
'mediatype:texts',
'(%s)'
% ' OR '.join(
f'repub_state:{state}' for state in VALID_READY_REPUB_STATES
),
'scanningcenter:*',
'scanner:*',
'scandate:*',
'!collection:opensource',
'!collection:additional_collections',
'!collection:litigationworks',
'!noindex:true',
'!is_dark:true',
'format:pdf',
f'indexdate:{day}*',
# Fetch back to items added before the day of interest, since items
# sometimes take a few days to process into the collection.
f'addeddate:[{day - 60 * DAY} TO {day + 1 * DAY}]',
# TODO: This seems to be getting more records than expected
*(['format:marc'] if marcs else []),
]
),
'q': f'({hard_requirements}) AND (({soft_requirements}) OR ({exempt_collections}))',
'fl': 'identifier,format',
'service': 'metadata__unlimited',
'rows': '100000', # This is the max, I believe
Expand Down

0 comments on commit 7272e09

Please sign in to comment.