From f9e1945f1003275ad3cc5fe620666df9d9b3baae Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 7 Jul 2022 15:48:10 +0200 Subject: [PATCH] Process in three phases? --- scripts/add_bwb_covers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/add_bwb_covers.py b/scripts/add_bwb_covers.py index 069c67aab01..e5d523a9a2c 100755 --- a/scripts/add_bwb_covers.py +++ b/scripts/add_bwb_covers.py @@ -25,7 +25,7 @@ 1. Phase 1 needs read access to BWB zipfiles in ol-home0:/1/var/tmp/imports/2022/covers 2. slow_edition_needs_a_cover() takes a long time to find each Open Library Edition. 3. fast_edition_needs_a_cover() required direct access to web.ctx.site.things(). -4. Could read many methods of access accellerate the edition_needs_a_cover() process? +4. Could read-many methods of access accelerate the edition_needs_a_cover() process? """ from __future__ import annotations @@ -74,18 +74,18 @@ def generate_covers_dict(covers_dir: Path = COVERS_DIR) -> dict: the zip files in covers_dir. This process currently takes about 17 sec to generate a dict containing 1.3M ISBN-13s. - {'Apr2022_1_lc_13.zip': [{'9780000528742': ''}, - {'9780006499268': ''}, - {'9780006499305': ''}, + {'Apr2022_1_lc_13.zip': [{'9780000528742': []}, + {'9780006499268': []}, + {'9780006499305': []}, """ from operator import attrgetter - def get_isbn_13s_from_zipfile(zipfile_path: Path) -> Iterator[dict[str, str]]: + def get_isbn_13s_from_zipfile(zipfile_path: Path) -> Iterator[dict[str, list]]: with ZipFile(zipfile_path) as in_file: for cover_file in sorted(in_file.infolist(), key=attrgetter("filename")): parts = cover_file.filename.split(".") # 9780425030134.jpg assert parts[-1] == "jpg", cover_file.filename - yield {parts[0]: ""} + yield {parts[0]: []} return { zipfile_path.name: list(get_isbn_13s_from_zipfile(zipfile_path))