diff --git a/CHANGELOG.md b/CHANGELOG.md index 755601556..331941431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where ## 0.6.0 (UNRELEASED) - * `apply` no longer creates empty commits unless you specify `--allow-empty` [#243](https://github.com/koordinates/sno/issues/243) + * `apply` and `import` no longer create empty commits unless you specify `--allow-empty` [#243](https://github.com/koordinates/sno/issues/243), [#245](https://github.com/koordinates/sno/issues/245) ## 0.5.0 diff --git a/sno/fast_import.py b/sno/fast_import.py index 44fb38907..8abbe7501 100644 --- a/sno/fast_import.py +++ b/sno/fast_import.py @@ -1,13 +1,14 @@ import logging import subprocess import time +import uuid from enum import Enum, auto import click import pygit2 from . import git_util -from .exceptions import SubprocessError, InvalidOperation +from .exceptions import SubprocessError, InvalidOperation, NotFound, NO_CHANGES from .import_source import ImportSource from .structure import DatasetStructure, RepositoryStructure from .repository_version import get_repo_version, extra_blobs_for_version @@ -38,6 +39,7 @@ def fast_import_tables( header=None, message=None, replace_existing=ReplaceExisting.DONT_REPLACE, + allow_empty=False, limit=None, max_pack_size="2G", max_delta_depth=0, @@ -87,16 +89,26 @@ def fast_import_tables( f"--depth={max_delta_depth}", ] + list(extra_cmd_args) + orig_branch = get_head_branch(repo) if header is None: - header = generate_header(repo, sources, message) + # import onto a temp branch. then reset the head branch afterwards. + # this allows us to check the result before updating the orig branch. + import_branch = f'refs/heads/{uuid.uuid4()}' + header = generate_header(repo, sources, message, branch=import_branch) + else: + import_branch = orig_branch if not quiet: click.echo("Starting git-fast-import...") - p = subprocess.Popen(cmd, cwd=repo.path, stdin=subprocess.PIPE,) + p = subprocess.Popen( + cmd, + cwd=repo.path, + stdin=subprocess.PIPE, + ) try: if replace_existing != ReplaceExisting.ALL: - header += f"from {get_head_branch(repo)}^0\n" + header += f"from {orig_branch}^0\n" p.stdin.write(header.encode("utf8")) # Write any extra blobs supplied by the client or needed for this version. @@ -183,6 +195,23 @@ def fast_import_tables( if not quiet: click.echo(f"Closed in {(t3-t2):.0f}s") + if import_branch != orig_branch: + try: + if head_tree and not allow_empty: + if repo.revparse_single(import_branch).peel(pygit2.Tree) == head_tree: + raise NotFound("No changes to commit", exit_code=NO_CHANGES) + # reset the original branch head to the import branch, so it gets the new commits + if head_tree: + # repo was non-empty before this, so orig_branch exists already. + # we have to delete and re-create it at the new commit. + repo.references.delete(orig_branch) + repo.references.create( + orig_branch, repo.references[import_branch].peel(pygit2.Commit).oid + ) + finally: + # remove the import branch + repo.references.delete(import_branch) + def get_head_tree(repo): """Returns the tree at the current repo HEAD.""" @@ -214,14 +243,16 @@ def write_blobs_to_stream(stream, blobs): yield i, blob_path -def generate_header(repo, sources, message): +def generate_header(repo, sources, message, branch=None): if message is None: message = generate_message(sources) author = git_util.author_signature(repo) committer = git_util.committer_signature(repo) + if branch is None: + branch = get_head_branch(repo) return ( - f"commit {get_head_branch(repo)}\n" + f"commit {branch}\n" f"author {author.name} <{author.email}> {author.time} {minutes_to_tz_offset(author.offset)}\n" f"committer {committer.name} <{committer.email}> {committer.time} {minutes_to_tz_offset(committer.offset)}\n" f"data {len(message.encode('utf8'))}\n{message}\n" diff --git a/sno/init.py b/sno/init.py index b4fb14b6e..23fd8d6c2 100644 --- a/sno/init.py +++ b/sno/init.py @@ -83,7 +83,8 @@ def temporary_branch(repo): @click.pass_context @click.argument("source") @click.argument( - "tables", nargs=-1, + "tables", + nargs=-1, ) @click.option( "--all-tables", @@ -141,7 +142,10 @@ def temporary_branch(repo): help="List available import formats, and then exit", ) @click.option( - "--output-format", "-o", type=click.Choice(["text", "json"]), default="text", + "--output-format", + "-o", + type=click.Choice(["text", "json"]), + default="text", ) @click.option( "--primary-key", @@ -152,6 +156,16 @@ def temporary_branch(repo): is_flag=True, help="Replace existing dataset(s) of the same name.", ) +@click.option( + "--allow-empty", + is_flag=True, + default=False, + help=( + "Usually recording a commit that has the exact same tree as its sole " + "parent commit is a mistake, and the command prevents you from making " + "such a commit. This option bypasses the safety" + ), +) @click.option( "--max-delta-depth", hidden=True, @@ -170,6 +184,7 @@ def import_table( tables, table_info, replace_existing, + allow_empty, max_delta_depth, ): """ @@ -261,6 +276,7 @@ def import_table( replace_existing=ReplaceExisting.GIVEN if replace_existing else ReplaceExisting.DONT_REPLACE, + allow_empty=allow_empty, ) rs = RepositoryStructure(repo) @@ -314,7 +330,14 @@ def import_table( help="--depth option to git-fast-import (advanced users only)", ) def init( - ctx, message, directory, repo_version, import_from, bare, wc_path, max_delta_depth, + ctx, + message, + directory, + repo_version, + import_from, + bare, + wc_path, + max_delta_depth, ): """ Initialise a new repository and optionally import data. @@ -347,7 +370,10 @@ def init( if import_from: fast_import_tables( - repo, sources, message=message, max_delta_depth=max_delta_depth, + repo, + sources, + message=message, + max_delta_depth=max_delta_depth, ) head_commit = repo.head.peel(pygit2.Commit) checkout.reset_wc_if_needed(repo, head_commit) diff --git a/tests/test_init.py b/tests/test_init.py index ca8579a54..48976d40b 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -89,7 +89,10 @@ def test_import_table_with_prompt(data_archive_readonly, tmp_path, cli_runner, c assert r.exit_code == 0 with chdir(repo_path): r = cli_runner.invoke( - ["import", data / "census2016_sdhca_ot_short.gpkg",], + [ + "import", + data / "census2016_sdhca_ot_short.gpkg", + ], input="census2016_sdhca_ot_ced_short\n", ) # Table was specified interactively via prompt @@ -178,7 +181,11 @@ def test_import_table_with_prompt_with_no_input( def test_import_replace_existing( - data_archive, tmp_path, cli_runner, chdir, geopackage, + data_archive, + tmp_path, + cli_runner, + chdir, + geopackage, ): with data_archive("gpkg-polygons") as data: repo_path = tmp_path / 'emptydir' @@ -237,8 +244,45 @@ def test_import_replace_existing( } +def test_import_replace_existing_with_no_changes( + data_archive, + tmp_path, + cli_runner, + chdir, + geopackage, +): + with data_archive("gpkg-polygons") as data: + repo_path = tmp_path / 'emptydir' + r = cli_runner.invoke(["init", repo_path]) + assert r.exit_code == 0 + with chdir(repo_path): + r = cli_runner.invoke( + [ + "import", + data / "nz-waca-adjustments.gpkg", + "nz_waca_adjustments:mytable", + ] + ) + assert r.exit_code == 0, r.stderr + + # now import the same thing over the top (no changes) + r = cli_runner.invoke( + [ + "import", + "--replace-existing", + data / "nz-waca-adjustments.gpkg", + "nz_waca_adjustments:mytable", + ] + ) + assert r.exit_code == 44, r.stderr + + def test_import_replace_existing_with_compatible_schema_changes( - data_archive, tmp_path, cli_runner, chdir, geopackage, + data_archive, + tmp_path, + cli_runner, + chdir, + geopackage, ): with data_archive("gpkg-polygons") as data: repo_path = tmp_path / 'emptydir' @@ -304,7 +348,11 @@ def test_import_replace_existing_with_compatible_schema_changes( def test_import_replace_existing_with_column_renames( - data_archive, tmp_path, cli_runner, chdir, geopackage, + data_archive, + tmp_path, + cli_runner, + chdir, + geopackage, ): with data_archive("gpkg-polygons") as data: repo_path = tmp_path / 'emptydir' @@ -594,7 +642,11 @@ def test_init_import( def test_init_import_commit_headers( - data_archive, tmp_path, cli_runner, chdir, geopackage, + data_archive, + tmp_path, + cli_runner, + chdir, + geopackage, ): with data_archive("gpkg-points") as data: repo_path = tmp_path / "data.sno"