From 74e2925d854b787577f24da8cf0f983f92ff462e Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Fri, 16 Apr 2021 15:06:50 +1200 Subject: [PATCH] Review changes --- sno/fast_import.py | 56 +++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/sno/fast_import.py b/sno/fast_import.py index dc6f39ff5..11a81a6c9 100644 --- a/sno/fast_import.py +++ b/sno/fast_import.py @@ -227,7 +227,7 @@ def fast_import_tables( cmd.append("--quiet") orig_commit = repo.head_commit - import_branches = [] + import_refs = [] if verbosity >= 1: click.echo("Starting git-fast-import...") @@ -246,14 +246,12 @@ def fast_import_tables( for i in range(num_processes): if header is None: # import onto a temp branch. then reset the head branch afterwards. - import_branch = f"refs/heads/{uuid.uuid4()}" - import_branches.append(import_branch) + import_ref = f"refs/sno-import/{uuid.uuid4()}" + import_refs.append(import_ref) # may be None, if head is detached orig_branch = repo.head_branch - generated_header = generate_header( - repo, sources, message, import_branch - ) + generated_header = generate_header(repo, sources, message, import_ref) else: generated_header = header # this won't work if num_processes > 1 because we'll try and write to @@ -403,34 +401,40 @@ def _ids(): if verbosity >= 1: click.echo(f"Closed in {(t3-t2):.0f}s") - if import_branches: + if import_refs: # we created temp branches for the import above. # each of the branches has _part_ of the import. # we have to merge the trees together to get a sensible commit. try: - trees = [repo.revparse_single(b).peel(pygit2.Tree) for b in import_branches] - builder = RichTreeBuilder(repo, trees[0]) - for t in trees[1:]: - datasets = Datasets(t, SUPPORTED_DATASET_CLASS) - for ds in datasets: - try: - feature_tree = ds.feature_tree - except KeyError: - pass - else: - for subtree in feature_tree: - builder.insert( - f"{ds.path}/{ds.FEATURE_PATH}{subtree.name}", subtree - ) - new_tree = builder.flush() + trees = [repo.revparse_single(b).peel(pygit2.Tree) for b in import_refs] + if len(import_refs) > 1: + click.echo(f"Joining {len(import_refs)} parallel-imported trees...") + builder = RichTreeBuilder(repo, trees[0]) + for t in trees[1:]: + datasets = Datasets(t, SUPPORTED_DATASET_CLASS) + for ds in datasets: + try: + feature_tree = ds.feature_tree + except KeyError: + pass + else: + for subtree in feature_tree: + builder.insert( + f"{ds.path}/{ds.FEATURE_PATH}{subtree.name}", + subtree, + ) + new_tree = builder.flush() + t4 = time.monotonic() + click.echo(f"Joined trees in {(t4-t3):.0f}s") + else: + new_tree = trees[0] + t4 = time.monotonic() if not allow_empty: if new_tree == orig_tree: raise NotFound("No changes to commit", exit_code=NO_CHANGES) # use the existing commit details we already imported, but use the new tree - existing_commit = repo.revparse_single(import_branches[0]).peel( - pygit2.Commit - ) + existing_commit = repo.revparse_single(import_refs[0]).peel(pygit2.Commit) repo.create_commit( orig_branch or "HEAD", existing_commit.author, @@ -441,7 +445,7 @@ def _ids(): ) finally: # remove the import branches - for b in import_branches: + for b in import_refs: repo.references.delete(b)