Skip to content

Commit

Permalink
Separate cli and core logic
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 committed May 5, 2024
1 parent 095f0c2 commit 3f3beb2
Show file tree
Hide file tree
Showing 16 changed files with 508 additions and 427 deletions.
16 changes: 1 addition & 15 deletions pyimorg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1 @@
from __future__ import annotations

import click

from .cli import diff, groupby

__all__ = ['cli']

@click.group()
def cli():
"""Command-line tool for organizing images."""
pass

cli.command(diff)
cli.command(groupby)
from .pyimorg import *
2 changes: 1 addition & 1 deletion pyimorg/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from . import cli
from .cli import cli

cli()
16 changes: 14 additions & 2 deletions pyimorg/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
from .diff import *
from .groupby import *
import click

from .diff import diff
from .groupby import groupby

__all__ = ['cli']

@click.group()
def cli():
"""Command-line tool for organizing images."""
pass

cli.command(diff)
cli.command(groupby)
113 changes: 3 additions & 110 deletions pyimorg/cli/diff.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from __future__ import annotations

import logging
from pathlib import Path
from typing import Literal

import click

from ..filesystem import cksum_parallel, cp_p_parallel, is_image, mkdir_p_parallel
from ..logger import set_logger_level
from ..pyimorg.diff import Hasher, diff as _diff

__all__ = ['Hasher', 'diff']

Hasher = Literal['sha256', 'sha512']
__all__ = ['diff']

@click.argument('src1', type=click.Path(exists=True, file_okay=False, dir_okay=True))
@click.argument('src2', type=click.Path(exists=True, file_okay=False, dir_okay=True))
Expand All @@ -27,8 +22,6 @@ def diff(
*,
hasher: Hasher,
threads: int,
logger_level: int = logging.INFO,
disable_progbar: bool = False,
):
"""
Given two sets of images contained in SRC1 and SRC2,
Expand All @@ -47,104 +40,4 @@ def diff(
- `src2_only`: Contains the images that only exist in SRC2 but not in SRC1.
The path of each output image is based on that in SRC2.
"""
if isinstance(src1, str):
src1 = Path(src1)
if isinstance(src2, str):
src2 = Path(src2)
if isinstance(dst, str):
dst = Path(dst)

if dst.exists():
msg = f'The destination directory ({dst}) already exists.'
raise ValueError(msg)

set_logger_level(logger_level)

src1_img_paths = [path for path in src1.rglob('*') if is_image(path)]
src1_img_hashes = cksum_parallel(
src1_img_paths,
digest=hasher,
n_jobs=threads,
desc='Hashing images from src1',
disable_progbar=disable_progbar,
)
src1_img_hash_to_path = {
img_hash: img_path
for img_path, img_hash in zip(src1_img_paths, src1_img_hashes)
}

src2_img_paths = [path for path in src2.rglob('*') if is_image(path)]
src2_img_hashes = cksum_parallel(
src2_img_paths,
digest=hasher,
n_jobs=threads,
desc='Hashing images from src2',
disable_progbar=disable_progbar,
)
src2_img_hash_to_path = {
img_hash: img_path
for img_path, img_hash in zip(src2_img_paths, src2_img_hashes)
}

hashes_in_src1 = set(src1_img_hashes)
hashes_in_src2 = set(src2_img_hashes)
hashes_in_both = hashes_in_src1 & hashes_in_src2
hashes_in_src1_only = hashes_in_src1 - hashes_in_src2
hashes_in_src2_only = hashes_in_src2 - hashes_in_src1

in_both_src_path_to_dst_path = {
src1_img_hash_to_path[img_hash]: dst / 'both' / src1_img_hash_to_path[img_hash].relative_to(src1)
for img_hash in hashes_in_both
}

mkdir_p_parallel(
{path.parent for path in in_both_src_path_to_dst_path.values()},
n_jobs=threads,
desc='Creating output directories for images that exist in both',
disable_progbar=disable_progbar,
)

cp_p_parallel(
in_both_src_path_to_dst_path.items(),
n_jobs=threads,
desc='Writing output images that exist in both',
disable_progbar=disable_progbar,
)

in_src1_only_src_path_to_dst_path = {
src1_img_hash_to_path[img_hash]: dst / 'src1_only' / src1_img_hash_to_path[img_hash].relative_to(src1)
for img_hash in hashes_in_src1_only
}

mkdir_p_parallel(
{path.parent for path in in_src1_only_src_path_to_dst_path.values()},
n_jobs=threads,
desc='Creating output directories for images that exist in src1 only',
disable_progbar=disable_progbar,
)

cp_p_parallel(
in_src1_only_src_path_to_dst_path.items(),
n_jobs=threads,
desc='Writing output images that exist in src1 only',
disable_progbar=disable_progbar,
)

in_src2_only_src_path_to_dst_path = {
src2_img_hash_to_path[img_hash]: dst / 'src2_only' / src2_img_hash_to_path[img_hash].relative_to(src2)
for img_hash in hashes_in_src2_only
}

mkdir_p_parallel(
{path.parent for path in in_src2_only_src_path_to_dst_path.values()},
n_jobs=threads,
desc='Creating output directories for images that exist in src2 only',
disable_progbar=disable_progbar,
)

cp_p_parallel(
in_src2_only_src_path_to_dst_path.items(),
n_jobs=threads,
desc='Writing output images that exist in src2 only',
disable_progbar=disable_progbar,
)
return _diff(src1, src2, dst, hasher=hasher, threads=threads)
74 changes: 2 additions & 72 deletions pyimorg/cli/groupby.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,14 @@
from __future__ import annotations

from collections import defaultdict
from datetime import datetime
import logging
from pathlib import Path
from typing import Literal

import click
from tqdm import tqdm

from ..exif import read_captured_timestamp
from ..filesystem import cp_p_parallel, is_image, mkdir_p_parallel
from ..func import map_mt_with_tqdm
from ..logger import set_logger_level
from ..pyimorg import Group, groupby as _groupby

__all__ = ['Group', 'groupby']

Group = Literal['year', 'month', 'day']

def _get_dst_dir_name(timestamp: datetime, *, groupby: Group) -> str:
if groupby == 'year':
return timestamp.strftime(r'%Y')
elif groupby == 'month':
return timestamp.strftime(r'%Y%m')
elif groupby == 'day':
return timestamp.strftime(r'%Y%m%d')

@click.argument('src', type=click.Path(exists=True, file_okay=False, dir_okay=True))
@click.argument('dst', type=click.Path(exists=False))
@click.option('--group', '-g', type=click.Choice(['year', 'month', 'day']), default='month',
Expand All @@ -38,8 +21,6 @@ def groupby(
*,
group: Literal['year', 'month', 'day'],
threads: int,
logger_level: int = logging.INFO,
disable_progbar: bool = False,
):
"""
Given a set of images contained in SRC,
Expand All @@ -55,55 +36,4 @@ def groupby(
- `day`: `YYYYMMDD` format, where `YYYY` is the year, `MM` is the month,
and `DD` is the day of the month.
"""
if isinstance(src, str):
src = Path(src)
if isinstance(dst, str):
dst = Path(dst)

if dst.exists():
msg = f'The destination directory ({dst}) already exists.'
raise ValueError(msg)

set_logger_level(logger_level)

src_img_paths = [path for path in src.rglob('*') if is_image(path)]
src_img_captured_timestamps = map_mt_with_tqdm(
src_img_paths,
read_captured_timestamp,
n_jobs=threads,
desc=None if disable_progbar else 'Reading metadata of images',
)

dst_dir_name_to_src_img_paths: defaultdict[str, list[Path]] = defaultdict(list)
for src_img_path, captured_timestamp in tqdm(
zip(src_img_paths, src_img_captured_timestamps),
desc=f'Grouping images by {group}',
total=len(src_img_paths),
disable=disable_progbar,
):
if captured_timestamp is None:
dst_dir_name = 'UNKNOWN'
else:
dst_dir_name = _get_dst_dir_name(captured_timestamp, groupby=group)

dst_dir_name_to_src_img_paths[dst_dir_name].append(src_img_path)

src_img_path_to_dst_img_path = {
src_img_path: dst / dst_dir_name / src_img_path.name
for dst_dir_name, src_img_paths in dst_dir_name_to_src_img_paths.items()
for src_img_path in src_img_paths
}

mkdir_p_parallel(
{path.parent for path in src_img_path_to_dst_img_path.values()},
n_jobs=threads,
desc='Creating output directories',
disable_progbar=disable_progbar,
)

cp_p_parallel(
src_img_path_to_dst_img_path.items(),
n_jobs=threads,
desc='Writing output images',
disable_progbar=disable_progbar,
)
return _groupby(src, dst, group=group, threads=threads)
2 changes: 2 additions & 0 deletions pyimorg/pyimorg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .diff import *
from .groupby import *
Loading

0 comments on commit 3f3beb2

Please sign in to comment.