Skip to content

Commit

Permalink
Process in three phases?
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss committed Jul 7, 2022
1 parent 47a0cb2 commit f9e1945
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scripts/add_bwb_covers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
1. Phase 1 needs read access to BWB zipfiles in ol-home0:/1/var/tmp/imports/2022/covers
2. slow_edition_needs_a_cover() takes a long time to find each Open Library Edition.
3. fast_edition_needs_a_cover() required direct access to web.ctx.site.things().
4. Could read many methods of access accellerate the edition_needs_a_cover() process?
4. Could read-many methods of access accelerate the edition_needs_a_cover() process?
"""
from __future__ import annotations

Expand Down Expand Up @@ -74,18 +74,18 @@ def generate_covers_dict(covers_dir: Path = COVERS_DIR) -> dict:
the zip files in covers_dir. This process currently takes about 17 sec to generate
a dict containing 1.3M ISBN-13s.
{'Apr2022_1_lc_13.zip': [{'9780000528742': ''},
{'9780006499268': ''},
{'9780006499305': ''},
{'Apr2022_1_lc_13.zip': [{'9780000528742': []},
{'9780006499268': []},
{'9780006499305': []},
"""
from operator import attrgetter

def get_isbn_13s_from_zipfile(zipfile_path: Path) -> Iterator[dict[str, str]]:
def get_isbn_13s_from_zipfile(zipfile_path: Path) -> Iterator[dict[str, list]]:
with ZipFile(zipfile_path) as in_file:
for cover_file in sorted(in_file.infolist(), key=attrgetter("filename")):
parts = cover_file.filename.split(".") # 9780425030134.jpg
assert parts[-1] == "jpg", cover_file.filename
yield {parts[0]: ""}
yield {parts[0]: []}

return {
zipfile_path.name: list(get_isbn_13s_from_zipfile(zipfile_path))
Expand Down

0 comments on commit f9e1945

Please sign in to comment.