Skip to content

Commit

Permalink
import: No empty commits without --allow-empty
Browse files Browse the repository at this point in the history
This adds the --allow-empty option, which has the same meaning as
in `commit` and `apply`.

This behaviour mostly only affects `--replace-existing`,
since `import` will never produce an empty commit if you're not
replacing anything.
  • Loading branch information
craigds committed Sep 8, 2020
1 parent dfa5805 commit c15fc93
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 15 deletions.
43 changes: 37 additions & 6 deletions sno/fast_import.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging
import subprocess
import time
import uuid
from enum import Enum, auto

import click
import pygit2

from . import git_util
from .exceptions import SubprocessError, InvalidOperation
from .exceptions import SubprocessError, InvalidOperation, NotFound, NO_CHANGES
from .import_source import ImportSource
from .structure import DatasetStructure, RepositoryStructure
from .repository_version import get_repo_version, extra_blobs_for_version
Expand Down Expand Up @@ -38,6 +39,7 @@ def fast_import_tables(
header=None,
message=None,
replace_existing=ReplaceExisting.DONT_REPLACE,
allow_empty=False,
limit=None,
max_pack_size="2G",
max_delta_depth=0,
Expand Down Expand Up @@ -87,16 +89,26 @@ def fast_import_tables(
f"--depth={max_delta_depth}",
] + list(extra_cmd_args)

orig_branch = get_head_branch(repo)
if header is None:
header = generate_header(repo, sources, message)
# import onto a temp branch. then reset the head branch afterwards.
# this allows us to check the result before updating the orig branch.
import_branch = f'refs/heads/{uuid.uuid4()}'
header = generate_header(repo, sources, message, branch=import_branch)
else:
import_branch = orig_branch

if not quiet:
click.echo("Starting git-fast-import...")

p = subprocess.Popen(cmd, cwd=repo.path, stdin=subprocess.PIPE,)
p = subprocess.Popen(
cmd,
cwd=repo.path,
stdin=subprocess.PIPE,
)
try:
if replace_existing != ReplaceExisting.ALL:
header += f"from {get_head_branch(repo)}^0\n"
header += f"from {orig_branch}^0\n"
p.stdin.write(header.encode("utf8"))

# Write any extra blobs supplied by the client or needed for this version.
Expand Down Expand Up @@ -183,6 +195,23 @@ def fast_import_tables(
if not quiet:
click.echo(f"Closed in {(t3-t2):.0f}s")

if import_branch != orig_branch:
try:
if head_tree and not allow_empty:
if repo.revparse_single(import_branch).peel(pygit2.Tree) == head_tree:
raise NotFound("No changes to commit", exit_code=NO_CHANGES)
# reset the original branch head to the import branch, so it gets the new commits
if head_tree:
# repo was non-empty before this, so orig_branch exists already.
# we have to delete and re-create it at the new commit.
repo.references.delete(orig_branch)
repo.references.create(
orig_branch, repo.references[import_branch].peel(pygit2.Commit).oid
)
finally:
# remove the import branch
repo.references.delete(import_branch)


def get_head_tree(repo):
"""Returns the tree at the current repo HEAD."""
Expand Down Expand Up @@ -214,14 +243,16 @@ def write_blobs_to_stream(stream, blobs):
yield i, blob_path


def generate_header(repo, sources, message):
def generate_header(repo, sources, message, branch=None):
if message is None:
message = generate_message(sources)

author = git_util.author_signature(repo)
committer = git_util.committer_signature(repo)
if branch is None:
branch = get_head_branch(repo)
return (
f"commit {get_head_branch(repo)}\n"
f"commit {branch}\n"
f"author {author.name} <{author.email}> {author.time} {minutes_to_tz_offset(author.offset)}\n"
f"committer {committer.name} <{committer.email}> {committer.time} {minutes_to_tz_offset(committer.offset)}\n"
f"data {len(message.encode('utf8'))}\n{message}\n"
Expand Down
34 changes: 30 additions & 4 deletions sno/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def temporary_branch(repo):
@click.pass_context
@click.argument("source")
@click.argument(
"tables", nargs=-1,
"tables",
nargs=-1,
)
@click.option(
"--all-tables",
Expand Down Expand Up @@ -141,7 +142,10 @@ def temporary_branch(repo):
help="List available import formats, and then exit",
)
@click.option(
"--output-format", "-o", type=click.Choice(["text", "json"]), default="text",
"--output-format",
"-o",
type=click.Choice(["text", "json"]),
default="text",
)
@click.option(
"--primary-key",
Expand All @@ -152,6 +156,16 @@ def temporary_branch(repo):
is_flag=True,
help="Replace existing dataset(s) of the same name.",
)
@click.option(
"--allow-empty",
is_flag=True,
default=False,
help=(
"Usually recording a commit that has the exact same tree as its sole "
"parent commit is a mistake, and the command prevents you from making "
"such a commit. This option bypasses the safety"
),
)
@click.option(
"--max-delta-depth",
hidden=True,
Expand All @@ -170,6 +184,7 @@ def import_table(
tables,
table_info,
replace_existing,
allow_empty,
max_delta_depth,
):
"""
Expand Down Expand Up @@ -261,6 +276,7 @@ def import_table(
replace_existing=ReplaceExisting.GIVEN
if replace_existing
else ReplaceExisting.DONT_REPLACE,
allow_empty=allow_empty,
)

rs = RepositoryStructure(repo)
Expand Down Expand Up @@ -314,7 +330,14 @@ def import_table(
help="--depth option to git-fast-import (advanced users only)",
)
def init(
ctx, message, directory, repo_version, import_from, bare, wc_path, max_delta_depth,
ctx,
message,
directory,
repo_version,
import_from,
bare,
wc_path,
max_delta_depth,
):
"""
Initialise a new repository and optionally import data.
Expand Down Expand Up @@ -347,7 +370,10 @@ def init(

if import_from:
fast_import_tables(
repo, sources, message=message, max_delta_depth=max_delta_depth,
repo,
sources,
message=message,
max_delta_depth=max_delta_depth,
)
head_commit = repo.head.peel(pygit2.Commit)
checkout.reset_wc_if_needed(repo, head_commit)
Expand Down
62 changes: 57 additions & 5 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ def test_import_table_with_prompt(data_archive_readonly, tmp_path, cli_runner, c
assert r.exit_code == 0
with chdir(repo_path):
r = cli_runner.invoke(
["import", data / "census2016_sdhca_ot_short.gpkg",],
[
"import",
data / "census2016_sdhca_ot_short.gpkg",
],
input="census2016_sdhca_ot_ced_short\n",
)
# Table was specified interactively via prompt
Expand Down Expand Up @@ -178,7 +181,11 @@ def test_import_table_with_prompt_with_no_input(


def test_import_replace_existing(
data_archive, tmp_path, cli_runner, chdir, geopackage,
data_archive,
tmp_path,
cli_runner,
chdir,
geopackage,
):
with data_archive("gpkg-polygons") as data:
repo_path = tmp_path / 'emptydir'
Expand Down Expand Up @@ -237,8 +244,45 @@ def test_import_replace_existing(
}


def test_import_replace_existing_with_no_changes(
data_archive,
tmp_path,
cli_runner,
chdir,
geopackage,
):
with data_archive("gpkg-polygons") as data:
repo_path = tmp_path / 'emptydir'
r = cli_runner.invoke(["init", repo_path])
assert r.exit_code == 0
with chdir(repo_path):
r = cli_runner.invoke(
[
"import",
data / "nz-waca-adjustments.gpkg",
"nz_waca_adjustments:mytable",
]
)
assert r.exit_code == 0, r.stderr

# now import the same thing over the top (no changes)
r = cli_runner.invoke(
[
"import",
"--replace-existing",
data / "nz-waca-adjustments.gpkg",
"nz_waca_adjustments:mytable",
]
)
assert r.exit_code == 44, r.stderr


def test_import_replace_existing_with_compatible_schema_changes(
data_archive, tmp_path, cli_runner, chdir, geopackage,
data_archive,
tmp_path,
cli_runner,
chdir,
geopackage,
):
with data_archive("gpkg-polygons") as data:
repo_path = tmp_path / 'emptydir'
Expand Down Expand Up @@ -304,7 +348,11 @@ def test_import_replace_existing_with_compatible_schema_changes(


def test_import_replace_existing_with_column_renames(
data_archive, tmp_path, cli_runner, chdir, geopackage,
data_archive,
tmp_path,
cli_runner,
chdir,
geopackage,
):
with data_archive("gpkg-polygons") as data:
repo_path = tmp_path / 'emptydir'
Expand Down Expand Up @@ -594,7 +642,11 @@ def test_init_import(


def test_init_import_commit_headers(
data_archive, tmp_path, cli_runner, chdir, geopackage,
data_archive,
tmp_path,
cli_runner,
chdir,
geopackage,
):
with data_archive("gpkg-points") as data:
repo_path = tmp_path / "data.sno"
Expand Down

0 comments on commit c15fc93

Please sign in to comment.