From d14ff302dbd206da3f4163bc38148efef64f9f19 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 12 Apr 2024 15:15:11 +0300 Subject: [PATCH] Refactor to address quality complains --- annif/cli.py | 19 ++----------------- annif/cli_util.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/annif/cli.py b/annif/cli.py index 95553f858..a0178bfe3 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -609,7 +609,7 @@ def run_upload(project_ids_pattern, repo_id, token, commit_message): Hub repository. An authentication token and commit message can be given with options. """ - from huggingface_hub import HfApi, preupload_lfs_files + from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError, HFValidationError projects = cli_util.get_matching_projects(project_ids_pattern) @@ -621,24 +621,9 @@ def run_upload(project_ids_pattern, repo_id, token, commit_message): else f"Upload project(s) {project_ids_pattern} with Annif" ) - project_dirs = {p.datadir for p in projects} - vocab_dirs = {p.vocab.datadir for p in projects} - data_dirs = project_dirs.union(vocab_dirs) - fobjs, operations = [], [] try: - for data_dir in data_dirs: - logger.debug(f"Archiving directory {data_dir}") - fobj, operation = cli_util.prepare_datadir_commit(data_dir) - logger.debug(f"Preuploading to {operation.path_in_repo}") - preupload_lfs_files(repo_id, additions=[operation]) - fobjs.append(fobj) - operations.append(operation) - for project in projects: - fobj, operation = cli_util.prepare_config_commit(project) - fobjs.append(fobj) - operations.append(operation) - + fobjs, operations = cli_util.prepare_commits(projects, repo_id) api = HfApi() api.create_commit( repo_id=repo_id, diff --git a/annif/cli_util.py b/annif/cli_util.py index 978214483..7a67d1f8a 100644 --- a/annif/cli_util.py +++ b/annif/cli_util.py @@ -252,7 +252,31 @@ def get_matching_projects(pattern: str) -> list[AnnifProject]: ] -def prepare_datadir_commit(data_dir: str) -> tuple[io.BufferedRandom, Any]: +def prepare_commits(projects: list[AnnifProject], repo_id: str) -> tuple[list, list]: + """Prepare and pre-upload data and config commit operations for projects to a + Hugging Face Hub repository.""" + from huggingface_hub import preupload_lfs_files + + fobjs, operations = [], [] + data_dirs = {p.datadir for p in projects} + vocab_dirs = {p.vocab.datadir for p in projects} + all_dirs = data_dirs.union(vocab_dirs) + + for data_dir in all_dirs: + fobj, operation = _prepare_datadir_commit(data_dir) + preupload_lfs_files(repo_id, additions=[operation]) + fobjs.append(fobj) + operations.append(operation) + + for project in projects: + fobj, operation = _prepare_config_commit(project) + fobjs.append(fobj) + operations.append(operation) + + return fobjs, operations + + +def _prepare_datadir_commit(data_dir: str) -> tuple[io.BufferedRandom, Any]: from huggingface_hub import CommitOperationAdd zip_repo_path = data_dir.split(os.path.sep, 1)[1] + ".zip" @@ -261,7 +285,7 @@ def prepare_datadir_commit(data_dir: str) -> tuple[io.BufferedRandom, Any]: return fobj, operation -def prepare_config_commit(project: AnnifProject) -> tuple[io.BytesIO, Any]: +def _prepare_config_commit(project: AnnifProject) -> tuple[io.BytesIO, Any]: from huggingface_hub import CommitOperationAdd config_repo_path = project.project_id + ".cfg"