From 6e74958f5900855cc0c3b6484b2b2a7732293e6c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 Jun 2021 13:41:33 -0400 Subject: [PATCH 01/12] p2000: add 'git checkout -' test and decrease depth As we increase our list of commands to test in p2000-sparse-operations.sh, we will want to have a slightly smaller test repository. Reduce the size by a factor of four by reducing the depth of the step that creates a big index around a moderately-sized repository. Also add a step to run 'git checkout -' on repeat. This requires having a previous location in the reflog, so add that to the initialization steps. Signed-off-by: Derrick Stolee --- t/perf/p2000-sparse-operations.sh | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 94513c97748943..f7f8c0121030a2 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -6,7 +6,7 @@ test_description="test performance of Git operations using the index" test_perf_default_repo -SPARSE_CONE=f2/f4/f1 +SPARSE_CONE=f2/f4 test_expect_success 'setup repo and indexes' ' git reset --hard HEAD && @@ -27,7 +27,7 @@ test_expect_success 'setup repo and indexes' ' OLD_COMMIT=$(git rev-parse HEAD) && OLD_TREE=$(git rev-parse HEAD^{tree}) && - for i in $(test_seq 1 4) + for i in $(test_seq 1 3) do cat >in <<-EOF && 100755 blob $BLOB a @@ -43,14 +43,23 @@ test_expect_success 'setup repo and indexes' ' done && git sparse-checkout init --cone && - git branch -f wide $OLD_COMMIT && + git sparse-checkout set $SPARSE_CONE && + git checkout -b wide $OLD_COMMIT && + + for l2 in f1 f2 f3 f4 + do + echo more bogus >>$SPARSE_CONE/$l2/a && + git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 + done && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && ( cd full-index-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && ( @@ -58,7 +67,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && ( @@ -66,7 +76,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && ( @@ -74,7 +85,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) ' @@ -97,5 +109,6 @@ test_perf_on_all git status test_perf_on_all git add -A test_perf_on_all git add . test_perf_on_all git commit -a -m A +test_perf_on_all git checkout -f - test_done From 3e1d03c41be9701c2cd518afc6ebc2797bca4398 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jun 2021 07:26:00 -0400 Subject: [PATCH 02/12] p2000: compress repo names By using shorter names for the test repos, we will get a slightly more compressed performance summary without comprimising clarity. Signed-off-by: Derrick Stolee --- t/perf/p2000-sparse-operations.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index f7f8c0121030a2..597626276fbeae 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -52,36 +52,36 @@ test_expect_success 'setup repo and indexes' ' git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 done && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v3 && ( - cd full-index-v3 && + cd full-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v4 && ( - cd full-index-v4 && + cd full-v4 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && git update-index --index-version=4 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v3 && ( - cd sparse-index-v3 && + cd sparse-v3 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v4 && ( - cd sparse-index-v4 && + cd sparse-v4 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && @@ -92,8 +92,8 @@ test_expect_success 'setup repo and indexes' ' test_perf_on_all () { command="$@" - for repo in full-index-v3 full-index-v4 \ - sparse-index-v3 sparse-index-v4 + for repo in full-v3 full-v4 \ + sparse-v3 sparse-v4 do test_perf "$command ($repo)" " ( From cd94f820052c948e522f947a05797ebaf1907121 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 May 2021 16:13:20 -0400 Subject: [PATCH 03/12] commit: integrate with sparse-index Update 'git commit' to allow using the sparse-index in memory without expanding to a full one. The only place that had an ensure_full_index() call was in cache_tree_update(). The recursive algorithm for update_one() was already updated in 2de37c536 (cache-tree: integrate with sparse directory entries, 2021-03-03) to handle sparse directory entries in the index. Most of this change involves testing different command-line options that allow specifying which on-disk changes should be included in the commit. This includes no options (only take currently-staged changes), -a (take all tracked changes), and --include (take a list of specific changes). To simplify testing that these options do not expand the index, update the test that previously verified that 'git status' does not expand the index with a helper method, ensure_not_expanded(). This allows 'git commit' to operate much faster when the sparse-checkout cone is much smaller than the full list of files at HEAD. Here are the relevant lines from p2000-sparse-operations.sh: Test HEAD~1 HEAD ---------------------------------------------------------------------------------- 2000.14: git commit -a -m A (full-v3) 0.35(0.26+0.06) 0.36(0.28+0.07) +2.9% 2000.15: git commit -a -m A (full-v4) 0.32(0.26+0.05) 0.34(0.28+0.06) +6.3% 2000.16: git commit -a -m A (sparse-v3) 0.63(0.59+0.06) 0.04(0.05+0.05) -93.7% 2000.17: git commit -a -m A (sparse-v4) 0.64(0.59+0.08) 0.04(0.04+0.04) -93.8% It is important to compare the full-index case to the sparse-index case, so the improvement for index version v4 is actually an 88% improvement in this synthetic example. In a real repository with over two million files at HEAD and 60,000 files in the sparse-checkout definition, the time for 'git commit -a' went from 2.61 seconds to 134ms. I compared this to the result if the index only contained the paths in the sparse-checkout definition and found the theoretical optimum to be 120ms, so the out-of-cone paths only add a 12% overhead. Signed-off-by: Derrick Stolee --- builtin/commit.c | 3 ++ cache-tree.c | 2 - t/t1092-sparse-checkout-compatibility.sh | 47 ++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 12f51db158a0e0..0bc64892505f7c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1682,6 +1682,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(builtin_commit_usage, builtin_commit_options); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + status_init_config(&s, git_commit_config); s.commit_template = 1; status_format = STATUS_FORMAT_NONE; /* Ignore status.short */ diff --git a/cache-tree.c b/cache-tree.c index 45e58666afcb49..577b18d8811c02 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -461,8 +461,6 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; - ensure_full_index(istate); - if (!istate->cache_tree) istate->cache_tree = cache_tree(); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index cabbd42e339041..d3e34d0acaccc9 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -262,6 +262,34 @@ test_expect_success 'add, commit, checkout' ' test_all_match git checkout - ' +test_expect_success 'commit including unstaged changes' ' + init_repos && + + write_script edit-file <<-\EOF && + echo $1 >$2 + EOF + + run_on_all ../edit-file 1 a && + run_on_all ../edit-file 1 deep/a && + + test_all_match git commit -m "-a" -a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 2 a && + run_on_all ../edit-file 2 deep/a && + + test_all_match git commit -m "--include" --include deep/a && + test_all_match git status --porcelain=v2 && + test_all_match git commit -m "--include" --include a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 3 a && + run_on_all ../edit-file 3 deep/a && + + test_all_match git commit -m "--amend" -a --amend && + test_all_match git status --porcelain=v2 +' + test_expect_success 'status/add: outside sparse cone' ' init_repos && @@ -514,14 +542,25 @@ test_expect_success 'sparse-index is expanded and converted back' ' test_region index ensure_full_index trace2.txt ' -test_expect_success 'sparse-index is not expanded' ' - init_repos && - +ensure_not_expanded () { rm -f trace2.txt && echo >>sparse-index/untracked.txt && GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ - git -C sparse-index status && + git -C sparse-index "$@" && test_region ! index ensure_full_index trace2.txt +} + +test_expect_success 'sparse-index is not expanded' ' + init_repos && + + ensure_not_expanded status && + ensure_not_expanded commit --allow-empty -m empty && + echo >>sparse-index/a && + ensure_not_expanded commit -a -m a && + echo >>sparse-index/a && + ensure_not_expanded commit --include a -m a && + echo >>sparse-index/deep/deeper1/a && + ensure_not_expanded commit --include deep/deeper1/a -m deeper ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 65e79b8037ca0268363db9ff967c1342a76485c5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 22 Jun 2021 15:55:09 -0400 Subject: [PATCH 04/12] sparse-index: recompute cache-tree When some commands run with command_requires_full_index=1, then the index can get in a state where the in-memory cache tree is actually equal to the sparse index's cache tree instead of the full one. This results in incorrect entry_count values. By clearing the cache tree before converting to sparse, we avoid this issue. Signed-off-by: Derrick Stolee --- sparse-index.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sparse-index.c b/sparse-index.c index 53c8f711ccc82e..c6b4feec413a8f 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -170,6 +170,8 @@ int convert_to_sparse(struct index_state *istate) if (index_has_unmerged_entries(istate)) return 0; + /* Clear and recompute the cache-tree */ + cache_tree_free(&istate->cache_tree); if (cache_tree_update(istate, 0)) { warning(_("unable to update cache-tree, staying full")); return -1; From e9a9981477e9d6ba3aef96e39db5736b334d7189 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 25 May 2021 06:59:05 -0400 Subject: [PATCH 05/12] checkout: stop expanding sparse indexes Previous changes did the necessary improvements to unpack-trees.c and diff-lib.c in order to modify a sparse index based on its comparision with a tree. The only remaining work is to remove some ensure_full_index() calls and add tests that verify that the index is not expanded in our interesting cases. Include 'switch' and 'restore' in these tests, as they share a base implementation with 'checkout'. Here are the relevant performance results from p2000-sparse-operations.sh: Test HEAD~1 HEAD -------------------------------------------------------------------------------- 2000.18: git checkout -f - (full-v3) 0.49(0.43+0.03) 0.47(0.39+0.05) -4.1% 2000.19: git checkout -f - (full-v4) 0.45(0.37+0.06) 0.42(0.37+0.05) -6.7% 2000.20: git checkout -f - (sparse-v3) 0.76(0.71+0.07) 0.04(0.03+0.04) -94.7% 2000.21: git checkout -f - (sparse-v4) 0.75(0.72+0.04) 0.05(0.06+0.04) -93.3% It is important to compare the full index case to the sparse index case, as the previous results for the sparse index were inflated by the index expansion. For index v4, this is an 88% improvement. On an internal repository with over two million paths at HEAD and a sparse-checkout definition containing ~60,000 of those paths, 'git checkout' went from 3.5s to 297ms with this change. The theoretical optimum where only those ~60,000 paths exist was 275ms, so the extra sparse directory entries contribute a 22ms overhead. Signed-off-by: Derrick Stolee --- builtin/checkout.c | 8 +++----- t/t1092-sparse-checkout-compatibility.sh | 10 +++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index f4cd7747d35dd1..b5d477919a7438 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -378,9 +378,6 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); - for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -530,8 +527,6 @@ static int checkout_paths(const struct checkout_opts *opts, * Make sure all pathspecs participated in locating the paths * to be checked out. */ - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) if (opts->overlay_mode) mark_ce_for_checkout_overlay(active_cache[pos], @@ -1593,6 +1588,9 @@ static int checkout_main(int argc, const char **argv, const char *prefix, git_config(git_checkout_config, opts); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + opts->track = BRANCH_TRACK_UNSPECIFIED; if (!opts->accept_pathspec && !opts->accept_ref) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index d3e34d0acaccc9..fde3b41aba8b86 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -560,7 +560,15 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/a && ensure_not_expanded commit --include a -m a && echo >>sparse-index/deep/deeper1/a && - ensure_not_expanded commit --include deep/deeper1/a -m deeper + ensure_not_expanded commit --include deep/deeper1/a -m deeper && + ensure_not_expanded checkout rename-out-to-out && + ensure_not_expanded checkout - && + ensure_not_expanded switch rename-out-to-out && + ensure_not_expanded switch - && + git -C sparse-index reset --hard && + ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && + git -C sparse-index reset --hard && + ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 4b801c854fb7891a6530121281cff3f67451b283 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 19 Jul 2021 11:01:13 -0400 Subject: [PATCH 06/12] t1092: document bad 'git checkout' behavior Add new branches to the test repo that demonstrate directory/file conflicts in different ways. Since the directory 'folder1/' has adjacent files 'folder1-', 'folder1.txt', and 'folder10' it causes searches for 'folder1/' to land in a different place in the index than a search for 'folder1'. This causes a change in behavior when working with the df-conflict-1 and df-conflict-2 branches, whose only difference is that the first uses 'folder1' as the conflict and the other uses 'folder2' which does not have these adjacent files. We can extend two tests that compare the behavior across different 'git checkout' commands, and we see already that the behavior will be different in some cases and not in others. The difference between the two test loops is that one uses 'git reset --hard' between iterations. Further, we isolate the behavior of creating a staged change within a directory and then checking out a branch where that directory is replaced with a file. A full checkout behaves differently across these two cases, while a sparse-checkout cone behaves consistently. In both cases, the behavior is wrong. In one case, the staged change is dropped entirely. The other case the staged change is kept, replacing the file at that location, but none of the other files in the directory are kept. Likely, the correct behavior in this case is to reject the checkout and report the conflict, leaving HEAD in its previous location. None of the cases behave this way currently. Use comments to demonstrate that the tested behavior is only a documentation of the current, incorrect behavior to ensure we do not _accidentally_ change it. Instead, we would prefer to change it on purpose with a future change. At this point, the sparse-index does not handle these 'git checkout' commands correctly. Or rather, it _does_ reject the 'git checkout' when we have the staged change, but for the wrong reason. It also rejects the 'git checkout' commands when there is no staged change and we want to replace a directory with a file. A fix for that unstaged case will follow in the next change, but that will make the sparse-index agree with the full checkout case in these documented incorrect behaviors. Helped-by: Elijah Newren Signed-off-by: Derrick Stolee --- t/t1092-sparse-checkout-compatibility.sh | 142 ++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index fde3b41aba8b86..79b4a8ce1999c6 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -95,6 +95,25 @@ test_expect_success 'setup' ' git add . && git commit -m "rename deep/deeper1/... to folder1/..." && + git checkout -b df-conflict-1 base && + rm -rf folder1 && + echo content >folder1 && + git add . && + git commit -m "dir to file" && + + git checkout -b df-conflict-2 base && + rm -rf folder2 && + echo content >folder2 && + git add . && + git commit -m "dir to file" && + + git checkout -b fd-conflict base && + rm a && + mkdir a && + echo content >a/a && + git add . && + git commit -m "file to dir" && + git checkout -b deepest base && echo "updated deepest" >deep/deeper1/deepest/a && git commit -a -m "update deepest" && @@ -358,10 +377,16 @@ test_expect_success 'diff --staged' ' test_all_match git diff --staged ' +# NEEDSWORK: sparse-checkout behaves differently from full-checkout when +# running this test with 'df-conflict-2' after 'df-conflict-1'. test_expect_success 'diff with renames and conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + df-conflict-1 \ + fd-conflict do test_all_match git checkout rename-base && test_all_match git checkout $branch -- . && @@ -371,10 +396,15 @@ test_expect_success 'diff with renames and conflicts' ' done ' +# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file +# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + fd-conflict do git -C full-checkout reset --hard && test_sparse_match git reset --hard && @@ -606,4 +636,112 @@ test_expect_success 'add everything with deep new file' ' test_all_match git status --porcelain=v2 ' +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-1' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder1/larger-content + git add folder1 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-1 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-1 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD here, + # but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-1 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # Instead, the checkout deletes the folder1 file and adds the + # folder1/larger-content file, leaving all other paths that were + # in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder1 + A folder1/larger-content + EOF + test_cmp expect full-checkout-out && + test_cmp expect sparse-checkout-out && + + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-2' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder2/larger-content + git add folder2 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-2 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-2 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD + # here, but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-2 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # The full checkout deviates from the df-conflict-1 case here! + # It drops the change to folder1/larger-content and leaves the + # folder1 path as-is on disk. + test_must_be_empty full-checkout-out && + + # In the sparse-checkout case, the checkout deletes the folder1 + # file and adds the folder1/larger-content file, leaving all other + # paths that were in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder2 + A folder2/larger-content + EOF + test_cmp expect sparse-checkout-out && + + # Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + test_done From 71e301501c88399711a1bf8515d1747e92cfbb9b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 19 Jul 2021 11:01:33 -0400 Subject: [PATCH 07/12] unpack-trees: resolve sparse-directory/file conflicts When running unpack_trees() with a sparse index, we attempt to operate on the index without expanding the sparse directory entries. Thus, we operate by manipulating entire directories and passing them to the unpack function. In the case of the 'git checkout' command, this is the twoway_merge() function. There are several cases in twoway_merge() that handle different situations. One new one to add is the case of a directory/file conflict where the directory is sparse. Before the sparse index, such a conflict would appear as a list of file additions and deletions. Now, twoway_merge() initializes 'current', 'oldtree', and 'newtree' from src[0], src[1], and src[2], then sets 'oldtree' to NULL because it is equal to the df_conflict_entry. The way to determine that we have a directory/file conflict is to test that 'current' and 'newtree' disagree on being sparse directory entries. When we are in this case, we want to resolve the situation by calling merged_entry(). This allows replacing the 'current' entry with the 'newtree' entry. This is important for cases where we want to run 'git checkout' across the conflict and have the new HEAD represent the new file type at that path. The first NEEDSWORK comment dropped in t1092 demonstrates this necessary behavior. However, we still are in a confusing state when 'current' corresponds to a staged change within a sparse directory that is not present at HEAD. This should be atypical, because it requires adding a change outside of the sparse-checkout cone, but it is possible. Since we are unable to determine that this is a staged change within twoway_merge(), we cannot add a case to reject the merge at this point. I believe this is due to the use of df_conflict_entry in the place of 'oldtree' instead of using the valud at HEAD, which would provide some perspective to this decision. Any change that would allow this differentiation for staged entries would need to involve information further up in unpack_trees(). That work should be done, sometime, because we are further confusing the behavior of a directory/file conflict when staging a change in the directory. The two cases 'checkout behaves oddly with df-conflict-?' in t1092 demonstrate that even without a sparse-checkout, Git is not consistent in its behavior. Neither of the two options seems correct, either. This change makes the sparse-index behave differently than the typcial sparse-checkout case, but it does match the full checkout behavior in the df-conflict-2 case. Signed-off-by: Derrick Stolee --- t/t1092-sparse-checkout-compatibility.sh | 24 ++++++++++++------------ unpack-trees.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 79b4a8ce1999c6..91e30d6ec2248a 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -396,14 +396,14 @@ test_expect_success 'diff with renames and conflicts' ' done ' -# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file -# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && for branch in rename-out-to-out \ rename-out-to-in \ rename-in-to-out \ + df-conflict-1 \ + df-conflict-2 \ fd-conflict do git -C full-checkout reset --hard && @@ -667,10 +667,7 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' git -C sparse-checkout checkout df-conflict-1 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD here, - # but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-1 \ + git -C sparse-index checkout df-conflict-1 \ 1>sparse-index-out \ 2>sparse-index-err && @@ -684,7 +681,11 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' test_cmp expect full-checkout-out && test_cmp expect sparse-checkout-out && + # The sparse-index reports no output + test_must_be_empty sparse-index-out && + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err && test_cmp full-checkout-err sparse-checkout-err ' @@ -719,17 +720,15 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' git -C sparse-checkout checkout df-conflict-2 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD - # here, but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-2 \ + git -C sparse-index checkout df-conflict-2 \ 1>sparse-index-out \ 2>sparse-index-err && # The full checkout deviates from the df-conflict-1 case here! # It drops the change to folder1/larger-content and leaves the - # folder1 path as-is on disk. + # folder1 path as-is on disk. The sparse-index behaves the same. test_must_be_empty full-checkout-out && + test_must_be_empty sparse-index-out && # In the sparse-checkout case, the checkout deletes the folder1 # file and adds the folder1/larger-content file, leaving all other @@ -741,7 +740,8 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' test_cmp expect sparse-checkout-out && # Switched to branch df-conflict-1 - test_cmp full-checkout-err sparse-checkout-err + test_cmp full-checkout-err sparse-checkout-err && + test_cmp full-checkout-err sparse-index-err ' test_done diff --git a/unpack-trees.c b/unpack-trees.c index 0a5135ab39745d..309c1352f5d280 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2619,6 +2619,17 @@ int twoway_merge(const struct cache_entry * const *src, same(current, oldtree) && !same(current, newtree)) { /* 20 or 21 */ return merged_entry(newtree, current, o); + } else if (current && !oldtree && newtree && + S_ISSPARSEDIR(current->ce_mode) != S_ISSPARSEDIR(newtree->ce_mode) && + ce_stage(current) == 0) { + /* + * This case is a directory/file conflict across the sparse-index + * boundary. When we are changing from one path to another via + * 'git checkout', then we want to replace one entry with another + * via merged_entry(). If there are staged changes, then we should + * reject the merge instead. + */ + return merged_entry(newtree, current, o); } else return reject_merge(current, o); } From 5e96df4df582744a89b10ce55887a2c2aacc7e70 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 21 Jul 2021 14:23:11 -0400 Subject: [PATCH 08/12] t1092: test merge conflicts outside cone Conflicts can occur outside of the sparse-checkout definition, and in that case users might try to resolve the conflicts in several ways. Document a few of these ways in a test. Make it clear that this behavior is not necessarily the optimal flow, since users can become confused when Git deletes these files from the worktree in later commands. Reviewed-by: Elijah Newren Signed-off-by: Derrick Stolee --- t/t1092-sparse-checkout-compatibility.sh | 43 ++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 91e30d6ec2248a..4c3bcb349992f2 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -114,6 +114,16 @@ test_expect_success 'setup' ' git add . && git commit -m "file to dir" && + for side in left right + do + git checkout -b merge-$side base && + echo $side >>deep/deeper2/a && + echo $side >>folder1/a && + echo $side >>folder2/a && + git add . && + git commit -m "$side" || return 1 + done && + git checkout -b deepest base && echo "updated deepest" >deep/deeper1/deepest/a && git commit -a -m "update deepest" && @@ -482,6 +492,39 @@ test_expect_success 'merge' ' test_all_match git rev-parse HEAD^{tree} ' +# NEEDSWORK: This test is documenting current behavior, but that +# behavior can be confusing to users so there is desire to change it. +# Right now, users might be using this flow to work through conflicts, +# so any solution should present advice to users who try this sequence +# of commands to follow whatever new method we create. +test_expect_success 'merge with conflict outside cone' ' + init_repos && + + test_all_match git checkout -b merge-tip merge-left && + test_all_match git status --porcelain=v2 && + test_all_match test_must_fail git merge -m merge merge-right && + test_all_match git status --porcelain=v2 && + + # Resolve the conflict in different ways: + # 1. Revert to the base + test_all_match git checkout base -- deep/deeper2/a && + test_all_match git status --porcelain=v2 && + + # 2. Add the file with conflict markers + test_all_match git add folder1/a && + test_all_match git status --porcelain=v2 && + + # 3. Rename the file to another sparse filename and + # accept conflict markers as resolved content. + run_on_all mv folder2/a folder2/z && + test_all_match git add folder2 && + test_all_match git status --porcelain=v2 && + + test_all_match git merge --continue && + test_all_match git status --porcelain=v2 && + test_all_match git rev-parse HEAD^{tree} +' + test_expect_success 'merge with outside renames' ' init_repos && From defab1b86d3427c9e369e81cc481083a7dacace7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 11 Jan 2021 21:26:41 -0500 Subject: [PATCH 09/12] add: allow operating on a sparse-only index Disable command_requires_full_index for 'git add'. This does not require any additional removals of ensure_full_index(). The main reason is that 'git add' discovers changes based on the pathspec and the worktree itself. These are then inserted into the index directly, and calls to index_name_pos() or index_file_exists() already call expand_to_path() at the appropriate time to support a sparse-index. Add a test to check that 'git add -A' and 'git add ' does not expand the index at all, as long as is not within a sparse directory. This does not help the global 'git add .' case. We can measure the improvement using p2000-sparse-operations.sh with these results: Test HEAD~1 HEAD ------------------------------------------------------------------------------ 2000.6: git add -A (full-index-v3) 0.35(0.30+0.05) 0.37(0.29+0.06) +5.7% 2000.7: git add -A (full-index-v4) 0.31(0.26+0.06) 0.33(0.27+0.06) +6.5% 2000.8: git add -A (sparse-index-v3) 0.57(0.53+0.07) 0.05(0.04+0.08) -91.2% 2000.9: git add -A (sparse-index-v4) 0.58(0.55+0.06) 0.05(0.05+0.06) -91.4% While the 91% improvement seems impressive, it's important to recognize that previously we had significant overhead for expanding the sparse-index. Comparing to the full index case, 'git add -A' goes from 0.37s to 0.05s, which is "only" an 86% improvement. This modification to 'git add' creates some behavior change depending on the use of a sparse index. We modify a test in t1092 to demonstrate these changes which will be remedied in future changes. Reviewed-by: Elijah Newren Signed-off-by: Derrick Stolee --- builtin/add.c | 3 +++ t/t1092-sparse-checkout-compatibility.sh | 25 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index b773b5a4993e3e..c76e6ddd359e41 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -528,6 +528,9 @@ int cmd_add(int argc, const char **argv, const char *prefix) add_new_files = !take_worktree_changes && !refresh_only && !add_renormalize; require_pathspec = !(take_worktree_changes || (0 < addremove_explicit)); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR); /* diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 4c3bcb349992f2..77343cb6d95cac 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -340,21 +340,27 @@ test_expect_success 'status/add: outside sparse cone' ' test_sparse_match git status --porcelain=v2 && - # This "git add folder1/a" fails with a warning - # in the sparse repos, differing from the full - # repo. This is intentional. + # Adding the path outside of the sparse-checkout cone should fail. test_sparse_match test_must_fail git add folder1/a && - test_sparse_match test_must_fail git add --refresh folder1/a && - test_all_match git status --porcelain=v2 && + + test_must_fail git -C sparse-checkout add --refresh folder1/a 2>sparse-checkout-err && + test_must_fail git -C sparse-index add --refresh folder1/a 2>sparse-index-err && + # NEEDSWORK: A sparse index changes the error message. + ! test_cmp sparse-checkout-err sparse-index-err && + + # NEEDSWORK: Adding a newly-tracked file outside the cone succeeds + test_sparse_match git add folder1/new && test_all_match git add . && test_all_match git status --porcelain=v2 && test_all_match git commit -m folder1/new && + test_all_match git rev-parse HEAD^{tree} && run_on_all ../edit-contents folder1/newer && test_all_match git add folder1/ && test_all_match git status --porcelain=v2 && - test_all_match git commit -m folder1/newer + test_all_match git commit -m folder1/newer && + test_all_match git rev-parse HEAD^{tree} ' test_expect_success 'checkout and reset --hard' ' @@ -641,7 +647,12 @@ test_expect_success 'sparse-index is not expanded' ' git -C sparse-index reset --hard && ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && git -C sparse-index reset --hard && - ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 + ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 && + + echo >>sparse-index/README.md && + ensure_not_expanded add -A && + echo >>sparse-index/extra.txt && + ensure_not_expanded add extra.txt ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 9fc4313c88959cb6eab43b2e34750b4ed889771a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 12 Jan 2021 11:19:24 -0500 Subject: [PATCH 10/12] pathspec: stop calling ensure_full_index The add_pathspec_matches_against_index() focuses on matching a pathspec to file entries in the index. This already works correctly for its only use: checking if untracked files exist in the index. The compatibility checks in t1092 already test that 'git add ' works for a directory outside of the sparse cone. That provides coverage for removing this guard. This finalizes our ability to run 'git add .' without expanding a sparse index to a full one. This is evidenced by an update to t1092 and by these performance numbers for p2000-sparse-operations.sh: Test HEAD~1 HEAD -------------------------------------------------------------------------------- 2000.10: git add . (full-index-v3) 0.37(0.28+0.07) 0.36(0.27+0.06) -2.7% 2000.11: git add . (full-index-v4) 0.33(0.26+0.06) 0.32(0.28+0.05) -3.0% 2000.12: git add . (sparse-index-v3) 0.57(0.53+0.07) 0.06(0.06+0.07) -89.5% 2000.13: git add . (sparse-index-v4) 0.57(0.53+0.07) 0.05(0.03+0.09) -91.2% While the ~90% improvement is shown by the test results, it is worth noting that expanding the sparse index was adding overhead in previous commits. Comparing to the full index case, we see the performance go from 0.33s to 0.05s, an 85% improvement. Reviewed-by: Elijah Newren Signed-off-by: Derrick Stolee --- pathspec.c | 2 -- t/t1092-sparse-checkout-compatibility.sh | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pathspec.c b/pathspec.c index 08f8d3eedc39aa..44306fdaca2ee8 100644 --- a/pathspec.c +++ b/pathspec.c @@ -37,8 +37,6 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec, num_unmatched++; if (!num_unmatched) return; - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { const struct cache_entry *ce = istate->cache[i]; if (sw_action == PS_IGNORE_SKIP_WORKTREE && ce_skip_worktree(ce)) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 77343cb6d95cac..dee20db5bb193a 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -322,9 +322,6 @@ test_expect_success 'commit including unstaged changes' ' test_expect_success 'status/add: outside sparse cone' ' init_repos && - # adding a "missing" file outside the cone should fail - test_sparse_match test_must_fail git add folder1/a && - # folder1 is at HEAD, but outside the sparse cone run_on_sparse mkdir folder1 && cp initial-repo/folder1/a sparse-checkout/folder1/a && @@ -652,7 +649,9 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/README.md && ensure_not_expanded add -A && echo >>sparse-index/extra.txt && - ensure_not_expanded add extra.txt + ensure_not_expanded add extra.txt && + echo >>sparse-index/untracked.txt && + ensure_not_expanded add . ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 0ec03ab021da8b8a2278af3e14960c5cf4689646 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 17:02:36 -0400 Subject: [PATCH 11/12] add: ignore outside the sparse-checkout in refresh() Since b243012 (refresh_index(): add flag to ignore SKIP_WORKTREE entries, 2021-04-08), 'git add --refresh ' will output a warning message when the path is outside the sparse-checkout definition. The implementation of this warning happened in parallel with the sparse-index work to add ensure_full_index() calls throughout the codebase. Update this loop to have the proper logic that checks to see if the pathspec is outside the sparse-checkout definition. This avoids the need to expand the sparse directory entry and determine if the path is tracked, untracked, or ignored. We simply avoid updating the stat() information because there isn't even an entry that matches the path! Reviewed-by: Elijah Newren Signed-off-by: Derrick Stolee --- builtin/add.c | 10 +++++++++- t/t1092-sparse-checkout-compatibility.sh | 6 +----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index c76e6ddd359e41..d512ece655bce1 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -192,13 +192,21 @@ static int refresh(int verbose, const struct pathspec *pathspec) struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP; int flags = REFRESH_IGNORE_SKIP_WORKTREE | (verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET); + struct pattern_list pl = { 0 }; + int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl); seen = xcalloc(pathspec->nr, 1); refresh_index(&the_index, flags, pathspec, seen, _("Unstaged changes after refreshing the index:")); for (i = 0; i < pathspec->nr; i++) { if (!seen[i]) { - if (matches_skip_worktree(pathspec, i, &skip_worktree_seen)) { + const char *path = pathspec->items[i].original; + int dtype = DT_REG; + + if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) || + (sparse_checkout_enabled && + !path_matches_pattern_list(path, strlen(path), NULL, + &dtype, &pl, &the_index))) { string_list_append(&only_match_skip_worktree, pathspec->items[i].original); } else { diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index dee20db5bb193a..ddc86bb4152361 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -339,11 +339,7 @@ test_expect_success 'status/add: outside sparse cone' ' # Adding the path outside of the sparse-checkout cone should fail. test_sparse_match test_must_fail git add folder1/a && - - test_must_fail git -C sparse-checkout add --refresh folder1/a 2>sparse-checkout-err && - test_must_fail git -C sparse-index add --refresh folder1/a 2>sparse-index-err && - # NEEDSWORK: A sparse index changes the error message. - ! test_cmp sparse-checkout-err sparse-index-err && + test_sparse_match test_must_fail git add --refresh folder1/a && # NEEDSWORK: Adding a newly-tracked file outside the cone succeeds test_sparse_match git add folder1/new && From adf5b15ac3d44d92e0438451ef36631ed3ee2a63 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 26 Jul 2021 10:06:09 -0400 Subject: [PATCH 12/12] add: remove ensure_full_index() with --renormalize The --renormalize option updates the EOL conversions for the tracked files. However, the loop already ignores files marked with the SKIP_WORKTREE bit, so it will continue to do so with a sparse index because the sparse directory entries also have this bit set. Reviewed-by: Elijah Newren Signed-off-by: Derrick Stolee --- builtin/add.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index d512ece655bce1..c49e179abc36fa 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -144,8 +144,6 @@ static int renormalize_tracked_files(const struct pathspec *pathspec, int flags) { int i, retval = 0; - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i];