From 15f3e1e05683ce710f22d9b1ef0f6b849f1d4b36 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Tue, 4 May 2021 02:12:07 +0000
Subject: [PATCH 001/110] t6423: rename file within directory that other side
 renamed

Add a new testcase where one side of history renames:
   olddir/ -> newdir/
and the other side of history renames:
   olddir/a -> olddir/alpha

When using merge.directoryRenames=true, it seems logical to expect the
file to end up at newdir/alpha.  Unfortunately, both merge-recursive and
merge-ort currently see this as a rename/rename conflict:

   olddir/a -> newdir/a
vs.
   olddir/a -> newdir/alpha

Suggesting that there's some extra logic we probably want to add
somewhere to allow this case to run without triggering a conflict.  For
now simply document this known issue.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t6423-merge-rename-directories.sh | 58 +++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh
index 4c3d0b95dc5c6f..3037c5c9bf479f 100755
--- a/t/t6423-merge-rename-directories.sh
+++ b/t/t6423-merge-rename-directories.sh
@@ -4966,6 +4966,64 @@ test_expect_success '12g: Testcase with two kinds of "relevant" renames' '
 	)
 '
 
+# Testcase 12h, Testcase with two kinds of "relevant" renames
+#   Commit O: olddir/{a_1, b}
+#   Commit A: newdir/{a_2, b}
+#   Commit B: olddir/{alpha_1, b}
+#   Expected: newdir/{alpha_2, b}
+
+test_setup_12h () {
+	test_create_repo 12h &&
+	(
+		cd 12h &&
+
+		mkdir olddir &&
+		test_seq 3 8 >olddir/a &&
+		>olddir/b &&
+		git add olddir &&
+		git commit -m orig &&
+
+		git branch O &&
+		git branch A &&
+		git branch B &&
+
+		git switch A &&
+		test_seq 3 10 >olddir/a &&
+		git add olddir/a &&
+		git mv olddir newdir &&
+		git commit -m A &&
+
+		git switch B &&
+
+		git mv olddir/a olddir/alpha &&
+		git commit -m B
+	)
+}
+
+test_expect_failure '12h: renaming a file within a renamed directory' '
+	test_setup_12h &&
+	(
+		cd 12h &&
+
+		git checkout A^0 &&
+
+		test_might_fail git -c merge.directoryRenames=true merge -s recursive B^0 &&
+
+		git ls-files >tracked &&
+		test_line_count = 2 tracked &&
+
+		test_path_is_missing olddir/a &&
+		test_path_is_file newdir/alpha &&
+		test_path_is_file newdir/b &&
+
+		git rev-parse >actual \
+			HEAD:newdir/alpha  HEAD:newdir/b &&
+		git rev-parse >expect \
+			A:newdir/a         O:oldir/b &&
+		test_cmp expect actual
+	)
+'
+
 ###########################################################################
 # SECTION 13: Checking informational and conflict messages
 #

From edc23840b0baad017d02037f0a833eaa600ee21d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:00 +0200
Subject: [PATCH 002/110] test-lib: bring $remove_trash out of retirement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's no point in creating a repository or directory only to decide
right afterwards that we're skipping all the tests. We can save
ourselves the redundant "git init" or "mkdir" and "rm -rf" in this
case.

We carry around the "$remove_trash" variable because if the directory
is unexpectedly gone at test_done time we'll still want to hit the
"trash directory already removed" error, but not if we never created
the trash directory. See df4c0d1a792 (test-lib: abort when can't
remove trash directory, 2017-04-20) for the addition of that error.

So let's partially revert 06478dab4c (test-lib: retire $remove_trash
variable, 2017-04-23) and move the decision about whether to skip all
tests earlier.

Let's also fix a bug that was with us since abc5d372ec (Enable
parallel tests, 2008-08-08): we would leak $remove_trash from the
environment. We don't want this to error out, so let's reset it to the
empty string first:

     remove_trash=t GIT_SKIP_TESTS=t0001 ./t0001-init.sh

I tested this with --debug, see 4d0912a206 (test-lib.sh: do not barf
under --debug at the end of the test, 2017-04-24) for a bug we don't
want to re-introduce.

While I'm at it, let's move the HOME assignment to just before
test_create_repo, it could be lower, but it seems better to set it
before calling anything in test-lib-functions.sh

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib.sh | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index d3f6af6a65451c..b81d57bc0d8229 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1167,7 +1167,7 @@ test_done () {
 			esac
 		fi
 
-		if test -z "$debug"
+		if test -z "$debug" && test -n "$remove_trash"
 		then
 			test -d "$TRASH_DIRECTORY" ||
 			error "Tests passed but trash directory already removed before test cleanup; aborting"
@@ -1332,6 +1332,22 @@ then
 	exit 1
 fi
 
+# Are we running this test at all?
+remove_trash=
+this_test=${0##*/}
+this_test=${this_test%%-*}
+if match_pattern_list "$this_test" $GIT_SKIP_TESTS
+then
+	say_color info >&3 "skipping test $this_test altogether"
+	skip_all="skip all tests in $this_test"
+	test_done
+fi
+
+# Last-minute variable setup
+HOME="$TRASH_DIRECTORY"
+GNUPGHOME="$HOME/gnupg-home-not-used"
+export HOME GNUPGHOME
+
 # Test repository
 rm -fr "$TRASH_DIRECTORY" || {
 	GIT_EXIT_OK=t
@@ -1339,10 +1355,7 @@ rm -fr "$TRASH_DIRECTORY" || {
 	exit 1
 }
 
-HOME="$TRASH_DIRECTORY"
-GNUPGHOME="$HOME/gnupg-home-not-used"
-export HOME GNUPGHOME
-
+remove_trash=t
 if test -z "$TEST_NO_CREATE_REPO"
 then
 	test_create_repo "$TRASH_DIRECTORY"
@@ -1354,15 +1367,6 @@ fi
 # in subprocesses like git equals our $PWD (for pathname comparisons).
 cd -P "$TRASH_DIRECTORY" || exit 1
 
-this_test=${0##*/}
-this_test=${this_test%%-*}
-if match_pattern_list "$this_test" $GIT_SKIP_TESTS
-then
-	say_color info >&3 "skipping test $this_test altogether"
-	skip_all="skip all tests in $this_test"
-	test_done
-fi
-
 if test -n "$write_junit_xml"
 then
 	junit_xml_dir="$TEST_OUTPUT_DIRECTORY/out"

From b57913f205ce73f8bc2e8bab3719a50f2f3a7199 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:01 +0200
Subject: [PATCH 003/110] test-lib tests: remove dead
 GIT_TEST_FRAMEWORK_SELFTEST variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stop setting the GIT_TEST_FRAMEWORK_SELFTEST variable. This was originally needed
back in 4231d1ba99 (t0000: do not get self-test disrupted by
environment warnings, 2018-09-20).

It hasn't been needed since I deleted the relevant code in test-lib.sh
in c0eedbc009 (test-lib: remove check_var_migration, 2021-02-09), I
just didn't notice that it was set here.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t0000-basic.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index 705d62cc27a574..2c6e34b9478e37 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -84,10 +84,6 @@ _run_sub_test_lib_test_common () {
 		passing metrics
 		'
 
-		# Tell the framework that we are self-testing to make sure
-		# it yields a stable result.
-		GIT_TEST_FRAMEWORK_SELFTEST=t &&
-
 		# Point to the t/test-lib.sh, which isn't in ../ as usual
 		. "\$TEST_DIRECTORY"/test-lib.sh
 		EOF

From cb8fb7f861a8178bd78a1e5b1f2cbe5a20de6eb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:02 +0200
Subject: [PATCH 004/110] test-lib-functions: reword "test_commit --append"
 docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reword the documentation for "test_commit --append" added in my
3373518cc8 (test-lib functions: add an --append option to test_commit,
2021-01-12).

A follow-up commit will make the "echo" part of this configurable, and
in any case saying "echo >>" rather than ">>" was redundant.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib-functions.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 6348e8d7339cda..d169fb2f5971b3 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -172,8 +172,7 @@ debug () {
 #   --notick
 #	Do not call test_tick before making a commit
 #   --append
-#	Use "echo >>" instead of "echo >" when writing "<contents>" to
-#	"<file>"
+#	Use ">>" instead of ">" when writing "<contents>" to "<file>"
 #   --signoff
 #	Invoke "git commit" with --signoff
 #   --author <author>

From 5144219b7d7aeac9290cbee5a7425763bd253667 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:03 +0200
Subject: [PATCH 005/110] test-lib-functions: document test_commit --no-tag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 76b8b8d05c (test-lib functions: document arguments to test_commit,
2021-01-12) I added missing documentation to test_commit, but in less
than a month later in 3803a3a099 (t: add --no-tag option to
test_commit, 2021-02-09) we got another undocumented option. Let's fix
that.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib-functions.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index d169fb2f5971b3..d0f4f3885d62dd 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -177,6 +177,8 @@ debug () {
 #	Invoke "git commit" with --signoff
 #   --author <author>
 #	Invoke "git commit" with --author <author>
+#   --no-tag
+#	Do not tag the resulting commit
 #
 # This will commit a file with the given contents and the given commit
 # message, and tag the resulting commit with the given tag name.

From 6cf8d96fa28668a62f05be5b276739c03c514581 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:04 +0200
Subject: [PATCH 006/110] test-lib functions: add an --annotated option to
 "test_commit"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an --annotated option to test_commit to create annotated tags. The
tag will share the same message as the commit, and we'll call
test_tick before creating it (unless --notick) is provided.

There's quite a few tests that could be simplified with this
construct. I've picked one to convert in this change as a
demonstration.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1403-show-ref.sh     |  6 ++----
 t/test-lib-functions.sh | 27 ++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/t/t1403-show-ref.sh b/t/t1403-show-ref.sh
index 6ce62f878c358d..17d3cc14050695 100755
--- a/t/t1403-show-ref.sh
+++ b/t/t1403-show-ref.sh
@@ -7,11 +7,9 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 . ./test-lib.sh
 
 test_expect_success setup '
-	test_commit A &&
-	git tag -f -a -m "annotated A" A &&
+	test_commit --annotate A &&
 	git checkout -b side &&
-	test_commit B &&
-	git tag -f -a -m "annotated B" B &&
+	test_commit --annotate B &&
 	git checkout main &&
 	test_commit C &&
 	git branch B A^0
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index d0f4f3885d62dd..6e2332a324a6a1 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -179,6 +179,10 @@ debug () {
 #	Invoke "git commit" with --author <author>
 #   --no-tag
 #	Do not tag the resulting commit
+#   --annotate
+#	Create an annotated tag with "--annotate -m <message>". Calls
+#	test_tick between making the commit and tag, unless --notick
+#	is given.
 #
 # This will commit a file with the given contents and the given commit
 # message, and tag the resulting commit with the given tag name.
@@ -191,7 +195,7 @@ test_commit () {
 	author= &&
 	signoff= &&
 	indir= &&
-	no_tag= &&
+	tag=light &&
 	while test $# != 0
 	do
 		case "$1" in
@@ -219,7 +223,10 @@ test_commit () {
 			shift
 			;;
 		--no-tag)
-			no_tag=yes
+			tag=none
+			;;
+		--annotate)
+			tag=annotate
 			;;
 		*)
 			break
@@ -243,10 +250,20 @@ test_commit () {
 	git ${indir:+ -C "$indir"} commit \
 	    ${author:+ --author "$author"} \
 	    $signoff -m "$1" &&
-	if test -z "$no_tag"
-	then
+	case "$tag" in
+	none)
+		;;
+	light)
 		git ${indir:+ -C "$indir"} tag "${4:-$1}"
-	fi
+		;;
+	annotate)
+		if test -z "$notick"
+		then
+			test_tick
+		fi &&
+		git ${indir:+ -C "$indir"} tag -a -m "$1" "${4:-$1}"
+		;;
+	esac
 }
 
 # Call test_merge with the arguments "<message> <commit>", where <commit>

From 8cfe386b78c15eff38388479aa2f9fae00a9cf53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:05 +0200
Subject: [PATCH 007/110] describe tests: convert setup to use test_commit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert the setup of the describe tests to use test_commit when
possible. This makes use of the new --annotate option to test_commit.

Some of the setup here could simply be removed since the data being
created wasn't important to any of the subsequent tests, so I've done
so. E.g. assigning to the "one" variable was always useless, and just
checking that we can describe HEAD after the first commit wasn't
useful.

In the case of the "two" variable we could instead use the tag we just
created. See 5312ab11fbf (Add describe test., 2007-01-13) for the
initial version of this code. There's other cases here like redundant
"test_tick" invocations, or the simplification of not echoing "X" to a
file we're about to tag as "x", now we just use "x" in both cases.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t6120-describe.sh | 58 ++++++++++-----------------------------------
 1 file changed, 13 insertions(+), 45 deletions(-)

diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh
index e89b6747beec4f..88fddc91424006 100755
--- a/t/t6120-describe.sh
+++ b/t/t6120-describe.sh
@@ -31,64 +31,32 @@ check_describe () {
 }
 
 test_expect_success setup '
+	test_commit initial file one &&
+	test_commit second file two &&
+	test_commit third file three &&
+	test_commit --annotate A file A &&
+	test_commit c file c &&
 
-	test_tick &&
-	echo one >file && git add file && git commit -m initial &&
-	one=$(git rev-parse HEAD) &&
-
-	git describe --always HEAD &&
-
-	test_tick &&
-	echo two >file && git add file && git commit -m second &&
-	two=$(git rev-parse HEAD) &&
-
-	test_tick &&
-	echo three >file && git add file && git commit -m third &&
-
-	test_tick &&
-	echo A >file && git add file && git commit -m A &&
-	test_tick &&
-	git tag -a -m A A &&
-
-	test_tick &&
-	echo c >file && git add file && git commit -m c &&
-	test_tick &&
-	git tag c &&
-
-	git reset --hard $two &&
-	test_tick &&
-	echo B >side && git add side && git commit -m B &&
-	test_tick &&
-	git tag -a -m B B &&
+	git reset --hard second &&
+	test_commit --annotate B side B &&
 
 	test_tick &&
 	git merge -m Merged c &&
 	merged=$(git rev-parse HEAD) &&
 
-	git reset --hard $two &&
-	test_tick &&
-	echo D >another && git add another && git commit -m D &&
-	test_tick &&
-	git tag -a -m D D &&
-	test_tick &&
-	git tag -a -m R R &&
-
-	test_tick &&
-	echo DD >another && git commit -a -m another &&
+	git reset --hard second &&
+	test_commit --no-tag D another D &&
 
 	test_tick &&
-	git tag e &&
+	git tag -a -m R R &&
 
-	test_tick &&
-	echo DDD >another && git commit -a -m "yet another" &&
+	test_commit e another DD &&
+	test_commit --no-tag "yet another" another DDD &&
 
 	test_tick &&
 	git merge -m Merged $merged &&
 
-	test_tick &&
-	echo X >file && echo X >side && git add file side &&
-	git commit -m x
-
+	test_commit --no-tag x file
 '
 
 check_describe A-* HEAD

From 47c88d16ba6c5c0237238ac600ee8b74a522e41c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:06 +0200
Subject: [PATCH 008/110] test-lib functions: add --printf option to
 test_commit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a --printf option to test_commit to allow writing to the file with
"printf" instead of "echo".

This is useful for writing "\n", "\0" etc., in particular in
combination with the --append option added in 3373518cc8 (test-lib
functions: add an --append option to test_commit, 2021-01-12).

I'm converting a few tests to use the new option rather than a manual
printf/add/commit combination to demonstrate its usefulness. While I'm
at it use "test_create_repo" where appropriate, and give the
first/second commit a meaningful/more conventional log message in
cases where no test cared about that message.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1307-config-blob.sh    |  4 +---
 t/t2030-unresolve-info.sh |  3 +--
 t/t4006-diff-mode.sh      |  6 ++----
 t/t4030-diff-textconv.sh  |  8 ++------
 t/t5520-pull.sh           | 10 ++--------
 t/test-lib-functions.sh   | 14 ++++++++++++--
 6 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/t/t1307-config-blob.sh b/t/t1307-config-blob.sh
index 002e6d3388e242..930dce06f0fdff 100755
--- a/t/t1307-config-blob.sh
+++ b/t/t1307-config-blob.sh
@@ -65,9 +65,7 @@ test_expect_success 'parse errors in blobs are properly attributed' '
 '
 
 test_expect_success 'can parse blob ending with CR' '
-	printf "[some]key = value\\r" >config &&
-	git add config &&
-	git commit -m CR &&
+	test_commit --printf CR config "[some]key = value\\r" &&
 	echo value >expect &&
 	git config --blob=HEAD:config some.key >actual &&
 	test_cmp expect actual
diff --git a/t/t2030-unresolve-info.sh b/t/t2030-unresolve-info.sh
index be6c84c52a2129..f691e6d90329f1 100755
--- a/t/t2030-unresolve-info.sh
+++ b/t/t2030-unresolve-info.sh
@@ -179,8 +179,7 @@ test_expect_success 'rerere and rerere forget (subdirectory)' '
 
 test_expect_success 'rerere forget (binary)' '
 	git checkout -f side &&
-	printf "a\0c" >binary &&
-	git commit -a -m binary &&
+	test_commit --printf binary binary "a\0c" &&
 	test_must_fail git merge second &&
 	git rerere forget binary
 '
diff --git a/t/t4006-diff-mode.sh b/t/t4006-diff-mode.sh
index 275ce5fa15be18..6cdee2a2164d0b 100755
--- a/t/t4006-diff-mode.sh
+++ b/t/t4006-diff-mode.sh
@@ -26,10 +26,8 @@ test_expect_success 'chmod' '
 '
 
 test_expect_success 'prepare binary file' '
-	git commit -m rezrov &&
-	printf "\00\01\02\03\04\05\06" >binbin &&
-	git add binbin &&
-	git commit -m binbin
+	git commit -m one &&
+	test_commit --printf two binbin "\00\01\02\03\04\05\06"
 '
 
 test_expect_success '--stat output after text chmod' '
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index c906320b60dcea..a39a626664d4a5 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -26,12 +26,8 @@ EOF
 chmod +x hexdump
 
 test_expect_success 'setup binary file with history' '
-	printf "\\0\\n" >file &&
-	git add file &&
-	git commit -m one &&
-	printf "\\01\\n" >>file &&
-	git add file &&
-	git commit -m two
+	test_commit --printf one file "\\0\\n" &&
+	test_commit --printf --append two file "\\01\\n"
 '
 
 test_expect_success 'file is considered binary by porcelain' '
diff --git a/t/t5520-pull.sh b/t/t5520-pull.sh
index a09411327f9a79..e2c0c510222dde 100755
--- a/t/t5520-pull.sh
+++ b/t/t5520-pull.sh
@@ -746,14 +746,8 @@ test_expect_success 'pull --rebase fails on corrupt HEAD' '
 '
 
 test_expect_success 'setup for detecting upstreamed changes' '
-	mkdir src &&
-	(
-		cd src &&
-		git init &&
-		printf "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n" > stuff &&
-		git add stuff &&
-		git commit -m "Initial revision"
-	) &&
+	test_create_repo src &&
+	test_commit -C src --printf one stuff "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n" &&
 	git clone src dst &&
 	(
 		cd src &&
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 6e2332a324a6a1..6f9199a65bef8d 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -173,6 +173,12 @@ debug () {
 #	Do not call test_tick before making a commit
 #   --append
 #	Use ">>" instead of ">" when writing "<contents>" to "<file>"
+#   --printf
+#       Use "printf" instead of "echo" when writing "<contents>" to
+#       "<file>", use this to write escape sequences such as "\0", a
+#       trailing "\n" won't be added automatically. This option
+#       supports nothing but the FORMAT of printf(1), i.e. no custom
+#       ARGUMENT(s).
 #   --signoff
 #	Invoke "git commit" with --signoff
 #   --author <author>
@@ -191,6 +197,7 @@ debug () {
 
 test_commit () {
 	notick= &&
+	echo=echo &&
 	append= &&
 	author= &&
 	signoff= &&
@@ -202,6 +209,9 @@ test_commit () {
 		--notick)
 			notick=yes
 			;;
+		--printf)
+			echo=printf
+			;;
 		--append)
 			append=yes
 			;;
@@ -238,9 +248,9 @@ test_commit () {
 	file=${2:-"$1.t"} &&
 	if test -n "$append"
 	then
-		echo "${3-$1}" >>"$indir$file"
+		$echo "${3-$1}" >>"$indir$file"
 	else
-		echo "${3-$1}" >"$indir$file"
+		$echo "${3-$1}" >"$indir$file"
 	fi &&
 	git ${indir:+ -C "$indir"} add "$file" &&
 	if test -z "$notick"

From ba7d318504a4f69e2b3ce664d6a3f2c413f5bf61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:07 +0200
Subject: [PATCH 009/110] submodule tests: use symbolic-ref --short to discover
 branch name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change a use of $GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME added in
704fed9ea22 (tests: start moving to a different default main branch
name, 2020-10-23) to simply discover the initial branch name of a
repository set up in this function with "symbolic-ref --short".

That's something done in another test in 704fed9ea22, so doing it like
this seems like an omission, or rather an overly eager
search/replacement instead of fixing the test logic.

There are only three uses of the GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
variable in the test suite, this gets rid of one of those.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-submodule-update.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/lib-submodule-update.sh b/t/lib-submodule-update.sh
index 4b714e93083f12..f7c7df0ca427b3 100644
--- a/t/lib-submodule-update.sh
+++ b/t/lib-submodule-update.sh
@@ -63,6 +63,7 @@ create_lib_submodule_repo () {
 	git init submodule_update_repo &&
 	(
 		cd submodule_update_repo &&
+		branch=$(git symbolic-ref --short HEAD) &&
 		echo "expect" >>.gitignore &&
 		echo "actual" >>.gitignore &&
 		echo "x" >file1 &&
@@ -144,7 +145,7 @@ create_lib_submodule_repo () {
 		git checkout -b valid_sub1 &&
 		git revert HEAD &&
 
-		git checkout "${GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME-master}"
+		git checkout "$branch"
 	)
 }
 

From 04d12d6590d316778f1b1f180a97fa94f9990928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:08 +0200
Subject: [PATCH 010/110] test-lib: reformat argument list in
 test_create_repo()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reformat an argument list changed in 675704c74dd (init: provide useful
advice about init.defaultBranch, 2020-12-11) to have the "-c" on the
same line as the argument it sets. This whitespace-only change makes
it easier to review a subsequent commit.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib-functions.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 6f9199a65bef8d..bcb187b632b267 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1259,8 +1259,8 @@ test_create_repo () {
 	mkdir -p "$repo"
 	(
 		cd "$repo" || error "Cannot setup test environment"
-		"${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X" -c \
-			init.defaultBranch="${GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME-master}" \
+		"${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X" \
+			-c init.defaultBranch="${GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME-master}" \
 			init \
 			"--template=$GIT_BUILD_DIR/templates/blt/" >&3 2>&4 ||
 		error "cannot run git init -- have you built things yet?"

From 97c8aac9c5fd54b5b6092d7ae5d3289ce5d9afe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:09 +0200
Subject: [PATCH 011/110] test-lib: do not show advice about init.defaultBranch
 under --verbose
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Arrange for the advice about naming the initial branch not to be shown
in the --verbose output of the test suite.

Since 675704c74dd (init: provide useful advice about
init.defaultBranch, 2020-12-11) some tests have been very chatty with
repeated occurrences of this multi-line advice. Having it be this
verbose isn't helpful for anyone in the context of git's own test
suite, and it makes debugging tests that use their own "git init"
invocations needlessly distracting.

By setting the GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME variable early in
test-lib.sh itself we'll squash the warning not only for
test_create_repo(), as 675704c74dd explicitly intended, but also for
other "git init" invocations.

And once we'd like to have this configuration set for all "git init"
invocations in the test suite we can get rid of the init.defaultBranch
configuration setting in test_create_repo(), as
repo_default_branch_name() in refs.c will take the GIT_TEST_* variable
over it being set.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib-functions.sh | 1 -
 t/test-lib.sh           | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index bcb187b632b267..ed7299db3d923d 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1260,7 +1260,6 @@ test_create_repo () {
 	(
 		cd "$repo" || error "Cannot setup test environment"
 		"${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X" \
-			-c init.defaultBranch="${GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME-master}" \
 			init \
 			"--template=$GIT_BUILD_DIR/templates/blt/" >&3 2>&4 ||
 		error "cannot run git init -- have you built things yet?"
diff --git a/t/test-lib.sh b/t/test-lib.sh
index b81d57bc0d8229..bc696e29b9864a 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -64,6 +64,11 @@ then
 	export GIT_TEST_DISALLOW_ABBREVIATED_OPTIONS
 fi
 
+# Explicitly set the default branch name for testing, to avoid the
+# transitory "git init" warning under --verbose.
+: ${GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME:=master}
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
 ################################################################
 # It appears that people try to run tests without building...
 "${GIT_TEST_INSTALLED:-$GIT_BUILD_DIR}/git$X" >/dev/null

From f0d4d398e281009bc5e34d830b37c0c1df2fb8a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Mon, 10 May 2021 16:19:10 +0200
Subject: [PATCH 012/110] test-lib: split up and deprecate test_create_repo()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove various redundant or obsolete code from the test_create_repo()
function, and split up its use in test-lib.sh from what tests need
from it.

This leave us with a pass-through wrapper for "git init" in
test-lib-functions.sh, in test-lib.sh we have the same, except for
needing to redirect stdout/stderr, and emitting an error ourselves if
it fails. We don't need to error() ourselves when test_create_repo()
is invoked, as the invocation will be a part of a test's "&&"-chain.

Everything below this paragraph is a detailed summary of the history
of test_create_repo() explaining why it's safe to remove the various
things it was doing:

 1. "mkdir -p" isn't needed because "git init" itself will create
    leading directories if needed.

 2. Since we're now a simple wrapper for "git init" we don't need to
    check that we have only one argument. If someone wants to run
    "test_create_repo --bare x" that's OK.

 3. We won't ever hit that "Cannot setup test environment"
    error.

    Checking the test environment sanity when doing "git init" dates
    back to eea420693be (t0000: catch trivial pilot errors.,
    2005-12-10) and 2ccd2027b01 (trivial: check, if t/trash directory
    was successfully created, 2006-01-05).

    We can also see it in another form a bit later in my own
    0d314ce834d (test-lib: use subshell instead of cd $new && .. && cd
    $old, 2010-08-30).

    But since 2006f0adaee (t/test-lib: make sure Git has already been
    built, 2012-09-17) we already check if we have a built git
    earlier.

    The one thing this was testing after that 2012 change was that
    we'd just built "git", but not "git-init", but since
    3af4c7156c4 (tests: respect GIT_TEST_INSTALLED when initializing
    repositories, 2018-11-12) we invoke "git", not "git-init".

    So all of that's been checked already, and we don't need to
    re-check it here.

 4. We don't need to move .git/hooks out of the way.

    That dates back to c09a69a83e3 (Disable hooks during tests.,
    2005-10-16), since then hooks became disabled by default in
    f98f8cbac01 (Ship sample hooks with .sample suffix, 2008-06-24).

    So the hooks were already disabled by default, but as can be seen
    from "mkdir .git/hooks" changes various tests needed to re-setup
    that directory. Now they no longer do.

    This makes us implicitly depend on the default hooks being
    disabled, which is a good thing. If and when we'd have any
    on-by-default hooks (I see no reason we ever would) we'd want to
    see the subtle and not so subtle ways that would break the test
    suite.

 5. We don't need to "cd" to the "$repo" directory at all anymore.

    In the code being removed here we both "cd"'d to the repository
    before calling "init", and did so in a subshell.

    It's not important to do either, so both of those can be
    removed. We cd'd because this code grew from test-lib.sh code
    where we'd have done so already, see eedf8f97e58 (Abstract
    test_create_repo out for use in tests., 2006-02-17), and later
    "cd"'d inside a subshell since 0d314ce834d to avoid having to keep
    track of an "old pwd" variable to cd back after the setup.

    Being in the repository directory made moving the hooks around
    easier (we wouldn't have to fully qualify the path). Since we're
    not moving the hooks per #4 above we don't need to "cd" for that
    reason either.

 6. We can drop the --template argument and instead rely on the
    GIT_TEMPLATE_DIR set to the same path earlier in test-lib.sh. See
    8683a45d669 (Introduce GIT_TEMPLATE_DIR, 2006-12-19)

 7. We only needed that ">&3 2>&4" redirection when invoked from
    test-lib.sh.

    We could still invoke test_create_repo() there, but as the
    invocation is now trivial and we don't have a good reason to use
    test_create_repo() elsewhere let's call "git init" there
    ourselves.

 8. We didn't need to resolve "git" as
    "${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X" in test_create_repo(),
    even for the use of test-lib.sh

    PATH is already set up in test-lib.sh to start with
    GIT_TEST_INSTALLED and/or GIT_EXEC_PATH before
    test_create_repo() (now "git init") is called.. So we can simply
    run "git" and rely on the PATH lookup choosing the right
    executable.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5406-remote-rejects.sh           |  1 -
 t/t5407-post-rewrite-hook.sh        |  2 --
 t/t5409-colorize-remote-messages.sh |  1 -
 t/test-lib-functions.sh             | 15 ++-------------
 t/test-lib.sh                       |  3 ++-
 5 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/t/t5406-remote-rejects.sh b/t/t5406-remote-rejects.sh
index ff06f99649e4f0..5c509db6fc378e 100755
--- a/t/t5406-remote-rejects.sh
+++ b/t/t5406-remote-rejects.sh
@@ -5,7 +5,6 @@ test_description='remote push rejects are reported by client'
 . ./test-lib.sh
 
 test_expect_success 'setup' '
-	mkdir .git/hooks &&
 	write_script .git/hooks/update <<-\EOF &&
 	exit 1
 	EOF
diff --git a/t/t5407-post-rewrite-hook.sh b/t/t5407-post-rewrite-hook.sh
index 5bb23cc3a4e519..6da8d760e288dc 100755
--- a/t/t5407-post-rewrite-hook.sh
+++ b/t/t5407-post-rewrite-hook.sh
@@ -20,8 +20,6 @@ test_expect_success 'setup' '
 	git checkout main
 '
 
-mkdir .git/hooks
-
 cat >.git/hooks/post-rewrite <<EOF
 #!/bin/sh
 echo \$@ > "$TRASH_DIRECTORY"/post-rewrite.args
diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh
index 5d8f401d8ece8c..9f1a483f426ea9 100755
--- a/t/t5409-colorize-remote-messages.sh
+++ b/t/t5409-colorize-remote-messages.sh
@@ -5,7 +5,6 @@ test_description='remote messages are colorized on the client'
 . ./test-lib.sh
 
 test_expect_success 'setup' '
-	mkdir .git/hooks &&
 	write_script .git/hooks/update <<-\EOF &&
 	echo error: error
 	echo ERROR: also highlighted
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index ed7299db3d923d..93a3fca16d9508 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1250,21 +1250,10 @@ test_atexit () {
 		} && (exit \"\$eval_ret\"); eval_ret=\$?; $test_atexit_cleanup"
 }
 
-# Most tests can use the created repository, but some may need to create more.
+# Deprecated wrapper for "git init", use "git init" directly instead
 # Usage: test_create_repo <directory>
 test_create_repo () {
-	test "$#" = 1 ||
-	BUG "not 1 parameter to test-create-repo"
-	repo="$1"
-	mkdir -p "$repo"
-	(
-		cd "$repo" || error "Cannot setup test environment"
-		"${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X" \
-			init \
-			"--template=$GIT_BUILD_DIR/templates/blt/" >&3 2>&4 ||
-		error "cannot run git init -- have you built things yet?"
-		mv .git/hooks .git/hooks-disabled
-	) || exit
+	git init "$@"
 }
 
 # This function helps on symlink challenged file systems when it is not
diff --git a/t/test-lib.sh b/t/test-lib.sh
index bc696e29b9864a..e986c5839e9531 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1363,7 +1363,8 @@ rm -fr "$TRASH_DIRECTORY" || {
 remove_trash=t
 if test -z "$TEST_NO_CREATE_REPO"
 then
-	test_create_repo "$TRASH_DIRECTORY"
+	git init "$TRASH_DIRECTORY" >&3 2>&4 ||
+	error "cannot run git init"
 else
 	mkdir -p "$TRASH_DIRECTORY"
 fi

From 0caf20f22806dfd478ab3ac26b5f3682f383d3b4 Mon Sep 17 00:00:00 2001
From: ZheNing Hu <adlternative@gmail.com>
Date: Thu, 13 May 2021 15:15:37 +0000
Subject: [PATCH 013/110] ref-filter: add objectsize to used_atom

When the support for "objectsize:disk" was bolted onto the
existing support for "objectsize", it didn't follow the
usual pattern for handling "atomtype:modifier", which reads
the <modifier> part just once while parsing the format
string, and store the parsed result in the union in the
used_atom structure, so that the string form of it does not
have to be parsed over and over at runtime (e.g. in
grab_common_values()).

Add a new member `objectsize` to the union `used_atom.u`,
so that we can separate the check of <modifier> from the
check of <atomtype>, this will bring scalability to atom
`%(objectsize)`.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 ref-filter.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index e2eac50d9508ee..8cb622b95db2a2 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -146,6 +146,9 @@ static struct used_atom {
 			enum { O_FULL, O_LENGTH, O_SHORT } option;
 			unsigned int length;
 		} oid;
+		struct {
+			enum { O_SIZE, O_SIZE_DISK } option;
+		} objectsize;
 		struct email_option {
 			enum { EO_RAW, EO_TRIM, EO_LOCALPART } option;
 		} email_option;
@@ -269,11 +272,13 @@ static int objectsize_atom_parser(const struct ref_format *format, struct used_a
 				  const char *arg, struct strbuf *err)
 {
 	if (!arg) {
+		atom->u.objectsize.option = O_SIZE;
 		if (*atom->name == '*')
 			oi_deref.info.sizep = &oi_deref.size;
 		else
 			oi.info.sizep = &oi.size;
 	} else if (!strcmp(arg, "disk")) {
+		atom->u.objectsize.option = O_SIZE_DISK;
 		if (*atom->name == '*')
 			oi_deref.info.disk_sizep = &oi_deref.disk_size;
 		else
@@ -967,12 +972,14 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_
 			name++;
 		if (!strcmp(name, "objecttype"))
 			v->s = xstrdup(type_name(oi->type));
-		else if (!strcmp(name, "objectsize:disk")) {
-			v->value = oi->disk_size;
-			v->s = xstrfmt("%"PRIuMAX, (uintmax_t)oi->disk_size);
-		} else if (!strcmp(name, "objectsize")) {
-			v->value = oi->size;
-			v->s = xstrfmt("%"PRIuMAX , (uintmax_t)oi->size);
+		else if (starts_with(name, "objectsize")) {
+			if (used_atom[i].u.objectsize.option == O_SIZE_DISK) {
+				v->value = oi->disk_size;
+				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)oi->disk_size);
+			} else if (used_atom[i].u.objectsize.option == O_SIZE) {
+				v->value = oi->size;
+				v->s = xstrfmt("%"PRIuMAX , (uintmax_t)oi->size);
+			}
 		} else if (!strcmp(name, "deltabase"))
 			v->s = xstrdup(oid_to_hex(&oi->delta_base_oid));
 		else if (deref)

From 1197f1a46360d3ae96bd9c15908a3a6f8e562207 Mon Sep 17 00:00:00 2001
From: ZheNing Hu <adlternative@gmail.com>
Date: Thu, 13 May 2021 15:15:38 +0000
Subject: [PATCH 014/110] ref-filter: introduce enum atom_type

In the original ref-filter design, it will copy the parsed
atom's name and attributes to `used_atom[i].name` in the
atom's parsing step, and use it again for string matching
in the later specific ref attributes filling step. It use
a lot of string matching to determine which atom we need.

Introduce the enum "atom_type", each enum value is named
as `ATOM_*`, which is the index of each corresponding
valid_atom entry. In the first step of the atom parsing,
`used_atom.atom_type` will record corresponding enum value
from valid_atom entry index, and then in specific reference
attribute filling step, only need to compare the value of
the `used_atom[i].atom_type` to check the atom type.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Christian Couder <christian.couder@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 ref-filter.c | 197 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 128 insertions(+), 69 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 8cb622b95db2a2..4db0e40ff4c625 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -108,6 +108,56 @@ static struct ref_to_worktree_map {
 	struct worktree **worktrees;
 } ref_to_worktree_map;
 
+/*
+ * The enum atom_type is used as the index of valid_atom array.
+ * In the atom parsing stage, it will be passed to used_atom.atom_type
+ * as the identifier of the atom type. We can check the type of used_atom
+ * entry by `if (used_atom[i].atom_type == ATOM_*)`.
+ */
+enum atom_type {
+	ATOM_REFNAME,
+	ATOM_OBJECTTYPE,
+	ATOM_OBJECTSIZE,
+	ATOM_OBJECTNAME,
+	ATOM_DELTABASE,
+	ATOM_TREE,
+	ATOM_PARENT,
+	ATOM_NUMPARENT,
+	ATOM_OBJECT,
+	ATOM_TYPE,
+	ATOM_TAG,
+	ATOM_AUTHOR,
+	ATOM_AUTHORNAME,
+	ATOM_AUTHOREMAIL,
+	ATOM_AUTHORDATE,
+	ATOM_COMMITTER,
+	ATOM_COMMITTERNAME,
+	ATOM_COMMITTEREMAIL,
+	ATOM_COMMITTERDATE,
+	ATOM_TAGGER,
+	ATOM_TAGGERNAME,
+	ATOM_TAGGEREMAIL,
+	ATOM_TAGGERDATE,
+	ATOM_CREATOR,
+	ATOM_CREATORDATE,
+	ATOM_SUBJECT,
+	ATOM_BODY,
+	ATOM_TRAILERS,
+	ATOM_CONTENTS,
+	ATOM_UPSTREAM,
+	ATOM_PUSH,
+	ATOM_SYMREF,
+	ATOM_FLAG,
+	ATOM_HEAD,
+	ATOM_COLOR,
+	ATOM_WORKTREEPATH,
+	ATOM_ALIGN,
+	ATOM_END,
+	ATOM_IF,
+	ATOM_THEN,
+	ATOM_ELSE,
+};
+
 /*
  * An atom is a valid field atom listed below, possibly prefixed with
  * a "*" to denote deref_tag().
@@ -119,6 +169,7 @@ static struct ref_to_worktree_map {
  * array.
  */
 static struct used_atom {
+	enum atom_type atom_type;
 	const char *name;
 	cmp_type type;
 	info_source source;
@@ -506,47 +557,47 @@ static struct {
 	int (*parser)(const struct ref_format *format, struct used_atom *atom,
 		      const char *arg, struct strbuf *err);
 } valid_atom[] = {
-	{ "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
-	{ "objecttype", SOURCE_OTHER, FIELD_STR, objecttype_atom_parser },
-	{ "objectsize", SOURCE_OTHER, FIELD_ULONG, objectsize_atom_parser },
-	{ "objectname", SOURCE_OTHER, FIELD_STR, oid_atom_parser },
-	{ "deltabase", SOURCE_OTHER, FIELD_STR, deltabase_atom_parser },
-	{ "tree", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
-	{ "parent", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
-	{ "numparent", SOURCE_OBJ, FIELD_ULONG },
-	{ "object", SOURCE_OBJ },
-	{ "type", SOURCE_OBJ },
-	{ "tag", SOURCE_OBJ },
-	{ "author", SOURCE_OBJ },
-	{ "authorname", SOURCE_OBJ },
-	{ "authoremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
-	{ "authordate", SOURCE_OBJ, FIELD_TIME },
-	{ "committer", SOURCE_OBJ },
-	{ "committername", SOURCE_OBJ },
-	{ "committeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
-	{ "committerdate", SOURCE_OBJ, FIELD_TIME },
-	{ "tagger", SOURCE_OBJ },
-	{ "taggername", SOURCE_OBJ },
-	{ "taggeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
-	{ "taggerdate", SOURCE_OBJ, FIELD_TIME },
-	{ "creator", SOURCE_OBJ },
-	{ "creatordate", SOURCE_OBJ, FIELD_TIME },
-	{ "subject", SOURCE_OBJ, FIELD_STR, subject_atom_parser },
-	{ "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
-	{ "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
-	{ "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
-	{ "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
-	{ "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
-	{ "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
-	{ "flag", SOURCE_NONE },
-	{ "HEAD", SOURCE_NONE, FIELD_STR, head_atom_parser },
-	{ "color", SOURCE_NONE, FIELD_STR, color_atom_parser },
-	{ "worktreepath", SOURCE_NONE },
-	{ "align", SOURCE_NONE, FIELD_STR, align_atom_parser },
-	{ "end", SOURCE_NONE },
-	{ "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
-	{ "then", SOURCE_NONE },
-	{ "else", SOURCE_NONE },
+	[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
+	[ATOM_OBJECTTYPE] = { "objecttype", SOURCE_OTHER, FIELD_STR, objecttype_atom_parser },
+	[ATOM_OBJECTSIZE] = { "objectsize", SOURCE_OTHER, FIELD_ULONG, objectsize_atom_parser },
+	[ATOM_OBJECTNAME] = { "objectname", SOURCE_OTHER, FIELD_STR, oid_atom_parser },
+	[ATOM_DELTABASE] = { "deltabase", SOURCE_OTHER, FIELD_STR, deltabase_atom_parser },
+	[ATOM_TREE] = { "tree", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
+	[ATOM_PARENT] = { "parent", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
+	[ATOM_NUMPARENT] = { "numparent", SOURCE_OBJ, FIELD_ULONG },
+	[ATOM_OBJECT] = { "object", SOURCE_OBJ },
+	[ATOM_TYPE] = { "type", SOURCE_OBJ },
+	[ATOM_TAG] = { "tag", SOURCE_OBJ },
+	[ATOM_AUTHOR] = { "author", SOURCE_OBJ },
+	[ATOM_AUTHORNAME] = { "authorname", SOURCE_OBJ },
+	[ATOM_AUTHOREMAIL] = { "authoremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
+	[ATOM_AUTHORDATE] = { "authordate", SOURCE_OBJ, FIELD_TIME },
+	[ATOM_COMMITTER] = { "committer", SOURCE_OBJ },
+	[ATOM_COMMITTERNAME] = { "committername", SOURCE_OBJ },
+	[ATOM_COMMITTEREMAIL] = { "committeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
+	[ATOM_COMMITTERDATE] = { "committerdate", SOURCE_OBJ, FIELD_TIME },
+	[ATOM_TAGGER] = { "tagger", SOURCE_OBJ },
+	[ATOM_TAGGERNAME] = { "taggername", SOURCE_OBJ },
+	[ATOM_TAGGEREMAIL] = { "taggeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
+	[ATOM_TAGGERDATE] = { "taggerdate", SOURCE_OBJ, FIELD_TIME },
+	[ATOM_CREATOR] = { "creator", SOURCE_OBJ },
+	[ATOM_CREATORDATE] = { "creatordate", SOURCE_OBJ, FIELD_TIME },
+	[ATOM_SUBJECT] = { "subject", SOURCE_OBJ, FIELD_STR, subject_atom_parser },
+	[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
+	[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
+	[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
+	[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
+	[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
+	[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
+	[ATOM_FLAG] = { "flag", SOURCE_NONE },
+	[ATOM_HEAD] = { "HEAD", SOURCE_NONE, FIELD_STR, head_atom_parser },
+	[ATOM_COLOR] = { "color", SOURCE_NONE, FIELD_STR, color_atom_parser },
+	[ATOM_WORKTREEPATH] = { "worktreepath", SOURCE_NONE },
+	[ATOM_ALIGN] = { "align", SOURCE_NONE, FIELD_STR, align_atom_parser },
+	[ATOM_END] = { "end", SOURCE_NONE },
+	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
+	[ATOM_THEN] = { "then", SOURCE_NONE },
+	[ATOM_ELSE] = { "else", SOURCE_NONE },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -628,6 +679,7 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 	at = used_atom_cnt;
 	used_atom_cnt++;
 	REALLOC_ARRAY(used_atom, used_atom_cnt);
+	used_atom[at].atom_type = i;
 	used_atom[at].name = xmemdupz(atom, ep - atom);
 	used_atom[at].type = valid_atom[i].cmp_type;
 	used_atom[at].source = valid_atom[i].source;
@@ -652,7 +704,7 @@ static int parse_ref_filter_atom(const struct ref_format *format,
 		return -1;
 	if (*atom == '*')
 		need_tagged = 1;
-	if (!strcmp(valid_atom[i].name, "symref"))
+	if (i == ATOM_SYMREF)
 		need_symref = 1;
 	return at;
 }
@@ -965,14 +1017,15 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		const char *name = used_atom[i].name;
+		enum atom_type atom_type = used_atom[i].atom_type;
 		struct atom_value *v = &val[i];
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
-		if (!strcmp(name, "objecttype"))
+		if (atom_type == ATOM_OBJECTTYPE)
 			v->s = xstrdup(type_name(oi->type));
-		else if (starts_with(name, "objectsize")) {
+		else if (atom_type == ATOM_OBJECTSIZE) {
 			if (used_atom[i].u.objectsize.option == O_SIZE_DISK) {
 				v->value = oi->disk_size;
 				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)oi->disk_size);
@@ -980,9 +1033,9 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_
 				v->value = oi->size;
 				v->s = xstrfmt("%"PRIuMAX , (uintmax_t)oi->size);
 			}
-		} else if (!strcmp(name, "deltabase"))
+		} else if (atom_type == ATOM_DELTABASE)
 			v->s = xstrdup(oid_to_hex(&oi->delta_base_oid));
-		else if (deref)
+		else if (atom_type == ATOM_OBJECTNAME && deref)
 			grab_oid(name, "objectname", &oi->oid, v, &used_atom[i]);
 	}
 }
@@ -995,16 +1048,17 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		const char *name = used_atom[i].name;
+		enum atom_type atom_type = used_atom[i].atom_type;
 		struct atom_value *v = &val[i];
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
-		if (!strcmp(name, "tag"))
+		if (atom_type == ATOM_TAG)
 			v->s = xstrdup(tag->tag);
-		else if (!strcmp(name, "type") && tag->tagged)
+		else if (atom_type == ATOM_TYPE && tag->tagged)
 			v->s = xstrdup(type_name(tag->tagged->type));
-		else if (!strcmp(name, "object") && tag->tagged)
+		else if (atom_type == ATOM_OBJECT && tag->tagged)
 			v->s = xstrdup(oid_to_hex(&tag->tagged->oid));
 	}
 }
@@ -1017,18 +1071,20 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		const char *name = used_atom[i].name;
+		enum atom_type atom_type = used_atom[i].atom_type;
 		struct atom_value *v = &val[i];
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
-		if (grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
+		if (atom_type == ATOM_TREE &&
+		    grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
 			continue;
-		if (!strcmp(name, "numparent")) {
+		if (atom_type == ATOM_NUMPARENT) {
 			v->value = commit_list_count(commit->parents);
 			v->s = xstrfmt("%lu", (unsigned long)v->value);
 		}
-		else if (starts_with(name, "parent")) {
+		else if (atom_type == ATOM_PARENT) {
 			struct commit_list *parents;
 			struct strbuf s = STRBUF_INIT;
 			for (parents = commit->parents; parents; parents = parents->next) {
@@ -1208,15 +1264,16 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void
 		return;
 	for (i = 0; i < used_atom_cnt; i++) {
 		const char *name = used_atom[i].name;
+		enum atom_type atom_type = used_atom[i].atom_type;
 		struct atom_value *v = &val[i];
 		if (!!deref != (*name == '*'))
 			continue;
 		if (deref)
 			name++;
 
-		if (starts_with(name, "creatordate"))
+		if (atom_type == ATOM_CREATORDATE)
 			grab_date(wholine, v, name);
-		else if (!strcmp(name, "creator"))
+		else if (atom_type == ATOM_CREATOR)
 			v->s = copy_line(wholine);
 	}
 }
@@ -1696,6 +1753,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 	/* Fill in specials first */
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
+		enum atom_type atom_type = atom->atom_type;
 		const char *name = used_atom[i].name;
 		struct atom_value *v = &ref->value[i];
 		int deref = 0;
@@ -1710,18 +1768,18 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			name++;
 		}
 
-		if (starts_with(name, "refname"))
+		if (atom_type == ATOM_REFNAME)
 			refname = get_refname(atom, ref);
-		else if (!strcmp(name, "worktreepath")) {
+		else if (atom_type == ATOM_WORKTREEPATH) {
 			if (ref->kind == FILTER_REFS_BRANCHES)
 				v->s = get_worktree_path(atom, ref);
 			else
 				v->s = xstrdup("");
 			continue;
 		}
-		else if (starts_with(name, "symref"))
+		else if (atom_type == ATOM_SYMREF)
 			refname = get_symref(atom, ref);
-		else if (starts_with(name, "upstream")) {
+		else if (atom_type == ATOM_UPSTREAM) {
 			const char *branch_name;
 			/* only local branches may have an upstream */
 			if (!skip_prefix(ref->refname, "refs/heads/",
@@ -1737,7 +1795,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			else
 				v->s = xstrdup("");
 			continue;
-		} else if (atom->u.remote_ref.push) {
+		} else if (atom_type == ATOM_PUSH && atom->u.remote_ref.push) {
 			const char *branch_name;
 			v->s = xstrdup("");
 			if (!skip_prefix(ref->refname, "refs/heads/",
@@ -1756,10 +1814,10 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			free((char *)v->s);
 			fill_remote_ref_details(atom, refname, branch, &v->s);
 			continue;
-		} else if (starts_with(name, "color:")) {
+		} else if (atom_type == ATOM_COLOR) {
 			v->s = xstrdup(atom->u.color);
 			continue;
-		} else if (!strcmp(name, "flag")) {
+		} else if (atom_type == ATOM_FLAG) {
 			char buf[256], *cp = buf;
 			if (ref->flag & REF_ISSYMREF)
 				cp = copy_advance(cp, ",symref");
@@ -1772,23 +1830,24 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 				v->s = xstrdup(buf + 1);
 			}
 			continue;
-		} else if (!deref && grab_oid(name, "objectname", &ref->objectname, v, atom)) {
-			continue;
-		} else if (!strcmp(name, "HEAD")) {
+		} else if (!deref && atom_type == ATOM_OBJECTNAME &&
+			   grab_oid(name, "objectname", &ref->objectname, v, atom)) {
+				continue;
+		} else if (atom_type == ATOM_HEAD) {
 			if (atom->u.head && !strcmp(ref->refname, atom->u.head))
 				v->s = xstrdup("*");
 			else
 				v->s = xstrdup(" ");
 			continue;
-		} else if (starts_with(name, "align")) {
+		} else if (atom_type == ATOM_ALIGN) {
 			v->handler = align_atom_handler;
 			v->s = xstrdup("");
 			continue;
-		} else if (!strcmp(name, "end")) {
+		} else if (atom_type == ATOM_END) {
 			v->handler = end_atom_handler;
 			v->s = xstrdup("");
 			continue;
-		} else if (starts_with(name, "if")) {
+		} else if (atom_type == ATOM_IF) {
 			const char *s;
 			if (skip_prefix(name, "if:", &s))
 				v->s = xstrdup(s);
@@ -1796,11 +1855,11 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 				v->s = xstrdup("");
 			v->handler = if_atom_handler;
 			continue;
-		} else if (!strcmp(name, "then")) {
+		} else if (atom_type == ATOM_THEN) {
 			v->handler = then_atom_handler;
 			v->s = xstrdup("");
 			continue;
-		} else if (!strcmp(name, "else")) {
+		} else if (atom_type == ATOM_ELSE) {
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;

From a30e43f61a1e614309875ab7775f2274b4e40742 Mon Sep 17 00:00:00 2001
From: Alex Henrie <alexhenrie24@gmail.com>
Date: Sat, 15 May 2021 14:01:11 -0600
Subject: [PATCH 015/110] merge: don't translate literal commands

These strings have not been modified in any translation, nor should they
be.

Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/merge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 388619536aa1d2..2d3424cecc9157 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -56,8 +56,8 @@ struct strategy {
 
 static const char * const builtin_merge_usage[] = {
 	N_("git merge [<options>] [<commit>...]"),
-	N_("git merge --abort"),
-	N_("git merge --continue"),
+	"git merge --abort",
+	"git merge --continue",
 	NULL
 };
 

From cd5b33fbdc0299765278d0b607b64202603f0882 Mon Sep 17 00:00:00 2001
From: Gregory Anders <greg@gpanders.com>
Date: Fri, 14 May 2021 09:15:53 -0600
Subject: [PATCH 016/110] git-send-email: add option to specify sendmail
 command

The sendemail.smtpServer configuration option and --smtp-server command
line option both support using a sendmail-like program to send emails by
specifying an absolute file path. However, this is not ideal for the
following reasons:

1. It overloads the meaning of smtpServer (now a program is being used
   for the server?)
2. It doesn't allow for non-absolute paths, arguments, or arbitrary
   scripting

Requiring an absolute path is bad for portability, as the same program
may be in different locations on different systems. If a user wishes to
pass arguments to their program, they have to use the smtpServerOption
option, which is cumbersome (as it must be repeated for each option) and
doesn't adhere to normal git conventions.

Introduce a new configuration option sendemail.sendmailCmd as well as a
command line option --sendmail-cmd that can be used to specify a command
(with or without arguments) or shell expression to run to send email.
The name of this option is consistent with --to-cmd and --cc-cmd. This
invocation honors the user's $PATH so that absolute paths are not
necessary. Arbitrary shell expressions are also supported, allowing
users to do basic scripting.

Give this option a higher precedence over --smtp-server and
sendemail.smtpServer, as the new interface is more flexible. For
backward compatibility, continue to support absolute paths in
--smtp-server and sendemail.smtpServer.

Signed-off-by: Gregory Anders <greg@gpanders.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-send-email.txt | 25 ++++++++++++++++-------
 git-send-email.perl              | 34 +++++++++++++++++++++++++-------
 t/t9001-send-email.sh            | 31 +++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index 93708aefeaef33..3db4eab4ba7063 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -167,6 +167,14 @@ Sending
 	`sendemail.envelopeSender` configuration variable; if that is
 	unspecified, choosing the envelope sender is left to your MTA.
 
+--sendmail-cmd=<command>::
+	Specify a command to run to send the email. The command should
+	be sendmail-like; specifically, it must support the `-i` option.
+	The command will be executed in the shell if necessary.  Default
+	is the value of `sendemail.sendmailcmd`.  If unspecified, and if
+	--smtp-server is also unspecified, git-send-email will search
+	for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH.
+
 --smtp-encryption=<encryption>::
 	Specify the encryption to use, either 'ssl' or 'tls'.  Any other
 	value reverts to plain SMTP.  Default is the value of
@@ -211,13 +219,16 @@ a password is obtained using 'git-credential'.
 
 --smtp-server=<host>::
 	If set, specifies the outgoing SMTP server to use (e.g.
-	`smtp.example.com` or a raw IP address).  Alternatively it can
-	specify a full pathname of a sendmail-like program instead;
-	the program must support the `-i` option.  Default value can
-	be specified by the `sendemail.smtpServer` configuration
-	option; the built-in default is to search for `sendmail` in
-	`/usr/sbin`, `/usr/lib` and $PATH if such program is
-	available, falling back to `localhost` otherwise.
+	`smtp.example.com` or a raw IP address).  If unspecified, and if
+	`--sendmail-cmd` is also unspecified, the default is to search
+	for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH if such a
+	program is available, falling back to `localhost` otherwise.
++
+For backward compatibility, this option can also specify a full pathname
+of a sendmail-like program instead; the program must support the `-i`
+option.  This method does not support passing arguments or using plain
+command names.  For those use cases, consider using `--sendmail-cmd`
+instead.
 
 --smtp-server-port=<port>::
 	Specifies a port different from the default port (SMTP
diff --git a/git-send-email.perl b/git-send-email.perl
index 1f425c08091d40..d9cff040678a45 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -70,6 +70,7 @@ sub usage {
 
   Sending:
     --envelope-sender       <str>  * Email envelope sender.
+    --sendmail-cmd          <str>  * Command to run to send email.
     --smtp-server       <str:int>  * Outgoing SMTP server to use. The port
                                      is optional. Default 'localhost'.
     --smtp-server-option    <str>  * Outgoing SMTP server option to use.
@@ -243,6 +244,7 @@ sub do_edit {
 my (@suppress_cc);
 my ($auto_8bit_encoding);
 my ($compose_encoding);
+my ($sendmail_cmd);
 # Variables with corresponding config settings & hardcoded defaults
 my ($debug_net_smtp) = 0;		# Net::SMTP, see send_message()
 my $thread = 1;
@@ -290,6 +292,7 @@ sub do_edit {
     "assume8bitencoding" => \$auto_8bit_encoding,
     "composeencoding" => \$compose_encoding,
     "transferencoding" => \$target_xfer_encoding,
+    "sendmailcmd" => \$sendmail_cmd,
 );
 
 my %config_path_settings = (
@@ -423,6 +426,7 @@ sub read_config {
 		    "no-bcc" => \$no_bcc,
 		    "chain-reply-to!" => \$chain_reply_to,
 		    "no-chain-reply-to" => sub {$chain_reply_to = 0},
+		    "sendmail-cmd=s" => \$sendmail_cmd,
 		    "smtp-server=s" => \$smtp_server,
 		    "smtp-server-option=s" => \@smtp_server_options,
 		    "smtp-server-port=s" => \$smtp_server_port,
@@ -996,16 +1000,19 @@ sub expand_one_alias {
 	$reply_to = sanitize_address($reply_to);
 }
 
-if (!defined $smtp_server) {
+if (!defined $sendmail_cmd && !defined $smtp_server) {
 	my @sendmail_paths = qw( /usr/sbin/sendmail /usr/lib/sendmail );
 	push @sendmail_paths, map {"$_/sendmail"} split /:/, $ENV{PATH};
 	foreach (@sendmail_paths) {
 		if (-x $_) {
-			$smtp_server = $_;
+			$sendmail_cmd = $_;
 			last;
 		}
 	}
-	$smtp_server ||= 'localhost'; # could be 127.0.0.1, too... *shrug*
+
+	if (!defined $sendmail_cmd) {
+		$smtp_server = 'localhost'; # could be 127.0.0.1, too... *shrug*
+	}
 }
 
 if ($compose && $compose > 0) {
@@ -1485,11 +1492,17 @@ sub send_message {
 
 	if ($dry_run) {
 		# We don't want to send the email.
-	} elsif (file_name_is_absolute($smtp_server)) {
+	} elsif (defined $sendmail_cmd || file_name_is_absolute($smtp_server)) {
 		my $pid = open my $sm, '|-';
 		defined $pid or die $!;
 		if (!$pid) {
-			exec($smtp_server, @sendmail_parameters) or die $!;
+			if (defined $sendmail_cmd) {
+				exec ("sh", "-c", "$sendmail_cmd \"\$@\"", "-", @sendmail_parameters)
+					or die $!;
+			} else {
+				exec ($smtp_server, @sendmail_parameters)
+					or die $!;
+			}
 		}
 		print $sm "$header\n$message";
 		close $sm or die $!;
@@ -1585,14 +1598,21 @@ sub send_message {
 		printf($dry_run ? __("Dry-Sent %s\n") : __("Sent %s\n"), $subject);
 	} else {
 		print($dry_run ? __("Dry-OK. Log says:\n") : __("OK. Log says:\n"));
-		if (!file_name_is_absolute($smtp_server)) {
+		if (!defined $sendmail_cmd && !file_name_is_absolute($smtp_server)) {
 			print "Server: $smtp_server\n";
 			print "MAIL FROM:<$raw_from>\n";
 			foreach my $entry (@recipients) {
 			    print "RCPT TO:<$entry>\n";
 			}
 		} else {
-			print "Sendmail: $smtp_server ".join(' ',@sendmail_parameters)."\n";
+			my $sm;
+			if (defined $sendmail_cmd) {
+				$sm = $sendmail_cmd;
+			} else {
+				$sm = $smtp_server;
+			}
+
+			print "Sendmail: $sm ".join(' ',@sendmail_parameters)."\n";
 		}
 		print $header, "\n";
 		if ($smtp) {
diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh
index 4eee9c3dcb5383..35289d9135ecca 100755
--- a/t/t9001-send-email.sh
+++ b/t/t9001-send-email.sh
@@ -2097,6 +2097,37 @@ test_expect_success $PREREQ 'leading and trailing whitespaces are removed' '
 	test_cmp expected-list actual-list
 '
 
+test_expect_success $PREREQ 'test using command name with --sendmail-cmd' '
+	clean_fake_sendmail &&
+	PATH="$(pwd):$PATH" \
+	git send-email \
+		--from="Example <nobody@example.com>" \
+		--to=nobody@example.com \
+		--sendmail-cmd="fake.sendmail" \
+		HEAD^ &&
+	test_path_is_file commandline1
+'
+
+test_expect_success $PREREQ 'test using arguments with --sendmail-cmd' '
+	clean_fake_sendmail &&
+	git send-email \
+		--from="Example <nobody@example.com>" \
+		--to=nobody@example.com \
+		--sendmail-cmd='\''"$(pwd)/fake.sendmail" -f nobody@example.com'\'' \
+		HEAD^ &&
+	test_path_is_file commandline1
+'
+
+test_expect_success $PREREQ 'test shell expression with --sendmail-cmd' '
+	clean_fake_sendmail &&
+	git send-email \
+		--from="Example <nobody@example.com>" \
+		--to=nobody@example.com \
+		--sendmail-cmd='\''f() { "$(pwd)/fake.sendmail" "$@"; };f'\'' \
+		HEAD^ &&
+	test_path_is_file commandline1
+'
+
 test_expect_success $PREREQ 'invoke hook' '
 	mkdir -p .git/hooks &&
 

From 4901884a23c6eaabb0738bddc440f4155f81b973 Mon Sep 17 00:00:00 2001
From: Alex Henrie <alexhenrie24@gmail.com>
Date: Sun, 16 May 2021 15:57:04 -0600
Subject: [PATCH 017/110] stash: don't translate literal commands

Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/stash.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/stash.c b/builtin/stash.c
index ba774cce674f33..df451b7483c243 100644
--- a/builtin/stash.c
+++ b/builtin/stash.c
@@ -24,7 +24,7 @@ static const char * const git_stash_usage[] = {
 	N_("git stash drop [-q|--quiet] [<stash>]"),
 	N_("git stash ( pop | apply ) [--index] [-q|--quiet] [<stash>]"),
 	N_("git stash branch <branchname> [<stash>]"),
-	N_("git stash clear"),
+	"git stash clear",
 	N_("git stash [push [-p|--patch] [-k|--[no-]keep-index] [-q|--quiet]\n"
 	   "          [-u|--include-untracked] [-a|--all] [-m|--message <message>]\n"
 	   "          [--pathspec-from-file=<file> [--pathspec-file-nul]]\n"
@@ -65,7 +65,7 @@ static const char * const git_stash_branch_usage[] = {
 };
 
 static const char * const git_stash_clear_usage[] = {
-	N_("git stash clear"),
+	"git stash clear",
 	NULL
 };
 

From f5f5a61d5a1d54200972e104ffb3640dfa80bfb6 Mon Sep 17 00:00:00 2001
From: Alex Henrie <alexhenrie24@gmail.com>
Date: Sun, 16 May 2021 15:59:57 -0600
Subject: [PATCH 018/110] submodule: use the imperative mood to describe the
 --files option

Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/submodule--helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 9d505a6329c8e2..da321a75653af2 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1299,7 +1299,7 @@ static int module_summary(int argc, const char **argv, const char *prefix)
 		OPT_BOOL(0, "cached", &cached,
 			 N_("use the commit stored in the index instead of the submodule HEAD")),
 		OPT_BOOL(0, "files", &files,
-			 N_("to compare the commit in the index with that in the submodule HEAD")),
+			 N_("compare the commit in the index with that in the submodule HEAD")),
 		OPT_BOOL(0, "for-status", &for_status,
 			 N_("skip submodules with 'ignore_config' value set to 'all'")),
 		OPT_INTEGER('n', "summary-limit", &summary_limit,

From 72ee47ceebc7d3ddbd31942b28f9fe47f00b0540 Mon Sep 17 00:00:00 2001
From: edef <edef@edef.eu>
Date: Sun, 16 May 2021 15:07:19 +0000
Subject: [PATCH 019/110] mailinfo: don't discard names under 3 characters

I sometimes receive patches from people with short mononyms, and in my
cultural environment these are not uncommon. To my dismay, git-am
currently discards their names, and replaces them with their email
addresses.

Link: https://www.kalzumeus.com/2010/06/17/falsehoods-programmers-believe-about-names/
Signed-off-by: edef <edef@edef.eu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130db6f5..3161a3940f339f 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -19,7 +19,7 @@ static void cleanup_space(struct strbuf *sb)
 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
 {
 	struct strbuf *src = name;
-	if (name->len < 3 || 60 < name->len || strpbrk(name->buf, "@<>"))
+	if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
 		src = email;
 	else if (name == out)
 		return;

From bfe35a61653c5789c9038b3fe9925941cf10f623 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Anders=20H=C3=B6ckersten?= <anders@hockersten.se>
Date: Mon, 17 May 2021 05:53:50 +0000
Subject: [PATCH 020/110] describe-doc: clarify default length of abbreviation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Clarify the default length used for the abbreviated form used for
commits in git describe.

The behavior was modified in Git 2.11.0, but the documentation was not
updated to clarify the new behavior.

Signed-off-by: Anders Höckersten <anders@hockersten.se>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-describe.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt
index a88f6ae2c6e785..c6a79c2a0f29cc 100644
--- a/Documentation/git-describe.txt
+++ b/Documentation/git-describe.txt
@@ -63,9 +63,10 @@ OPTIONS
 	Automatically implies --tags.
 
 --abbrev=<n>::
-	Instead of using the default 7 hexadecimal digits as the
-	abbreviated object name, use <n> digits, or as many digits
-	as needed to form a unique object name.  An <n> of 0
+	Instead of using the default number of hexadecimal digits (which
+	will vary according to the number of objects in the repository with
+	a default of 7) of the abbreviated object name, use <n> digits, or
+	as many digits as needed to form a unique object name. An <n> of 0
 	will suppress long format, only showing the closest tag.
 
 --candidates=<n>::
@@ -139,8 +140,11 @@ at the end.
 
 The number of additional commits is the number
 of commits which would be displayed by "git log v1.0.4..parent".
-The hash suffix is "-g" + unambiguous abbreviation for the tip commit
-of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`).
+The hash suffix is "-g" + an unambigous abbreviation for the tip commit
+of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The
+length of the abbreviation scales as the repository grows, using the
+approximate number of objects in the repository and a bit of math
+around the birthday paradox, and defaults to a minimum of 7.
 The "g" prefix stands for "git" and is used to allow describing the version of
 a software depending on the SCM the software is managed with. This is useful
 in an environment where people may use different SCMs.

From e2c5993744f2c802a907cba1cfb226dc688b527e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wolfgang=20M=C3=BCller?= <wolf@oriole.systems>
Date: Mon, 17 May 2021 10:02:43 +0200
Subject: [PATCH 021/110] rev-parse: mark die() messages for translation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These error messages are intended for the user. Let's touch them up
since we're here from the previous commit.

Signed-off-by: Wolfgang Müller <wolf@oriole.systems>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/rev-parse.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index 7af8dab8bcecdb..22c4e1a4ff0f21 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -435,11 +435,11 @@ static int cmd_parseopt(int argc, const char **argv, const char *prefix)
 	/* get the usage up to the first line with a -- on it */
 	for (;;) {
 		if (strbuf_getline(&sb, stdin) == EOF)
-			die("premature end of input");
+			die(_("premature end of input"));
 		ALLOC_GROW(usage, unb + 1, usz);
 		if (!strcmp("--", sb.buf)) {
 			if (unb < 1)
-				die("no usage string given before the `--' separator");
+				die(_("no usage string given before the `--' separator"));
 			usage[unb] = NULL;
 			break;
 		}
@@ -545,7 +545,7 @@ static void die_no_single_rev(int quiet)
 	if (quiet)
 		exit(1);
 	else
-		die("Needed a single revision");
+		die(_("Needed a single revision"));
 }
 
 static const char builtin_rev_parse_usage[] =
@@ -709,10 +709,10 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 			if (!strcmp(arg, "--resolve-git-dir")) {
 				const char *gitdir = argv[++i];
 				if (!gitdir)
-					die("--resolve-git-dir requires an argument");
+					die(_("--resolve-git-dir requires an argument"));
 				gitdir = resolve_gitdir(gitdir);
 				if (!gitdir)
-					die("not a gitdir '%s'", argv[i]);
+					die(_("not a gitdir '%s'"), argv[i]);
 				puts(gitdir);
 				continue;
 			}
@@ -736,7 +736,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 		if (!seen_end_of_options && *arg == '-') {
 			if (!strcmp(arg, "--git-path")) {
 				if (!argv[i + 1])
-					die("--git-path requires an argument");
+					die(_("--git-path requires an argument"));
 				strbuf_reset(&buf);
 				print_path(git_path("%s", argv[i + 1]), prefix,
 						format,
@@ -746,7 +746,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 			}
 			if (!strcmp(arg,"-n")) {
 				if (++i >= argc)
-					die("-n requires an argument");
+					die(_("-n requires an argument"));
 				if ((filter & DO_FLAGS) && (filter & DO_REVS)) {
 					show(arg);
 					show(argv[i]);
@@ -760,26 +760,26 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 			}
 			if (opt_with_value(arg, "--path-format", &arg)) {
 				if (!arg)
-					die("--path-format requires an argument");
+					die(_("--path-format requires an argument"));
 				if (!strcmp(arg, "absolute")) {
 					format = FORMAT_CANONICAL;
 				} else if (!strcmp(arg, "relative")) {
 					format = FORMAT_RELATIVE;
 				} else {
-					die("unknown argument to --path-format: %s", arg);
+					die(_("unknown argument to --path-format: %s"), arg);
 				}
 				continue;
 			}
 			if (!strcmp(arg, "--default")) {
 				def = argv[++i];
 				if (!def)
-					die("--default requires an argument");
+					die(_("--default requires an argument"));
 				continue;
 			}
 			if (!strcmp(arg, "--prefix")) {
 				prefix = argv[++i];
 				if (!prefix)
-					die("--prefix requires an argument");
+					die(_("--prefix requires an argument"));
 				startup_info->prefix = prefix;
 				output_prefix = 1;
 				continue;
@@ -848,7 +848,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 					else if (!strcmp(arg, "loose"))
 						abbrev_ref_strict = 0;
 					else
-						die("unknown mode for --abbrev-ref: %s",
+						die(_("unknown mode for --abbrev-ref: %s"),
 						    arg);
 				}
 				continue;
@@ -892,7 +892,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 				if (work_tree)
 					print_path(work_tree, prefix, format, DEFAULT_UNMODIFIED);
 				else
-					die("this operation must be run in a work tree");
+					die(_("this operation must be run in a work tree"));
 				continue;
 			}
 			if (!strcmp(arg, "--show-superproject-working-tree")) {
@@ -1020,7 +1020,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 				if (strcmp(val, "storage") &&
 				    strcmp(val, "input") &&
 				    strcmp(val, "output"))
-					die("unknown mode for --show-object-format: %s",
+					die(_("unknown mode for --show-object-format: %s"),
 					    arg);
 				puts(the_hash_algo->name);
 				continue;
@@ -1058,7 +1058,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix)
 		if (verify)
 			die_no_single_rev(quiet);
 		if (has_dashdash)
-			die("bad revision '%s'", arg);
+			die(_("bad revision '%s'"), arg);
 		as_is = 1;
 		if (!show_file(arg, output_prefix))
 			continue;

From 3d20ed27b8bdac5c82f4f78af802f9afa499f651 Mon Sep 17 00:00:00 2001
From: Matheus Tavares <matheus.bernardino@usp.br>
Date: Mon, 17 May 2021 16:49:03 -0300
Subject: [PATCH 022/110] parallel-checkout: send the new object_id algo field
 to the workers

An object_id storing a SHA-1 name has some unused bytes at the end of
the hash array. Since these bytes are not used, they are usually not
initialized to any value either. However, at
parallel_checkout.c:send_one_item() the object_id of a cache entry is
copied into a buffer which is later sent to a checkout worker through a
pipe write(). This makes Valgrind complain about passing uninitialized
bytes to a syscall. The worker won't use these uninitialized bytes
either, but the warning could confuse someone trying to debug this code;
So instead of using oidcpy(), send_one_item() uses hashcpy() to only
copy the used/initialized bytes of the object_id, and leave the
remaining part with zeros.

However, since cf0983213c ("hash: add an algo member to struct
object_id", 2021-04-26), using hashcpy() is no longer sufficient here as
it won't copy the new algo field from the object_id. Let's add and use a
new function which meets both our requirements of copying all the
important object_id data while still avoiding the uninitialized bytes,
by padding the end of the hash array in the destination object_id. With
this change, we also no longer need the destination buffer from
send_one_item() to be initialized with zeros, so let's switch from
xcalloc() to xmalloc() to make this clear.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 hash.h              | 16 ++++++++++++++++
 parallel-checkout.c | 13 ++++++-------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/hash.h b/hash.h
index 2986f991c66f5b..9c6df4d9527e0a 100644
--- a/hash.h
+++ b/hash.h
@@ -263,6 +263,22 @@ static inline void oidcpy(struct object_id *dst, const struct object_id *src)
 	dst->algo = src->algo;
 }
 
+/* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */
+static inline void oidcpy_with_padding(struct object_id *dst,
+				       struct object_id *src)
+{
+	size_t hashsz;
+
+	if (!src->algo)
+		hashsz = the_hash_algo->rawsz;
+	else
+		hashsz = hash_algos[src->algo].rawsz;
+
+	memcpy(dst->hash, src->hash, hashsz);
+	memset(dst->hash + hashsz, 0, GIT_MAX_RAWSZ - hashsz);
+	dst->algo = src->algo;
+}
+
 static inline struct object_id *oiddup(const struct object_id *src)
 {
 	struct object_id *dst = xmalloc(sizeof(struct object_id));
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 6b1af32bb3d4f7..ddc0ff3c0646ae 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -411,7 +411,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	len_data = sizeof(struct pc_item_fixed_portion) + name_len +
 		   working_tree_encoding_len;
 
-	data = xcalloc(1, len_data);
+	data = xmalloc(len_data);
 
 	fixed_portion = (struct pc_item_fixed_portion *)data;
 	fixed_portion->id = pc_item->id;
@@ -421,13 +421,12 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 	fixed_portion->name_len = name_len;
 	fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 	/*
-	 * We use hashcpy() instead of oidcpy() because the hash[] positions
-	 * after `the_hash_algo->rawsz` might not be initialized. And Valgrind
-	 * would complain about passing uninitialized bytes to a syscall
-	 * (write(2)). There is no real harm in this case, but the warning could
-	 * hinder the detection of actual errors.
+	 * We pad the unused bytes in the hash array because, otherwise,
+	 * Valgrind would complain about passing uninitialized bytes to a
+	 * write() syscall. The warning doesn't represent any real risk here,
+	 * but it could hinder the detection of actual errors.
 	 */
-	hashcpy(fixed_portion->oid.hash, pc_item->ce->oid.hash);
+	oidcpy_with_padding(&fixed_portion->oid, &pc_item->ce->oid);
 
 	variant = data + sizeof(*fixed_portion);
 	if (working_tree_encoding_len) {

From 68142e117c080b7a563d681dc34c7df2ab945df5 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 17 May 2021 12:24:49 +0000
Subject: [PATCH 023/110] hashfile: use write_in_full()

The flush() logic in csum-file.c was introduced originally by c38138c
(git-pack-objects: write the pack files with a SHA1 csum, 2005-06-26)
and a portion of the logic performs similar utility to write_in_full()
in wrapper.c. The history of write_in_full() is full of moves and
renames, but was originally introduced by 7230e6d (Add write_or_die(), a
helper function, 2006-08-21).

The point of these sections of code are to flush a write buffer using
xwrite() and report errors in the case of disk space issues or other
generic input/output errors. The logic in flush() can interpret the
output of write_in_full() to provide the correct error messages to
users.

The logic in the hashfile API has an additional set of logic to augment
the progress indicator between calls to xwrite(). This was introduced by
2a128d6 (add throughput display to git-push, 2007-10-30). It seems that
since the hashfile's buffer is only 8KB, these additional progress
indicators might not be incredibly necessary. Instead, update the
progress only when write_in_full() complete.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/csum-file.c b/csum-file.c
index 7510950fa3e99d..3c26389d4914df 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -25,21 +25,14 @@ static void flush(struct hashfile *f, const void *buf, unsigned int count)
 			die("sha1 file '%s' validation error", f->name);
 	}
 
-	for (;;) {
-		int ret = xwrite(f->fd, buf, count);
-		if (ret > 0) {
-			f->total += ret;
-			display_throughput(f->tp, f->total);
-			buf = (char *) buf + ret;
-			count -= ret;
-			if (count)
-				continue;
-			return;
-		}
-		if (!ret)
+	if (write_in_full(f->fd, buf, count) < 0) {
+		if (errno == ENOSPC)
 			die("sha1 file '%s' write error. Out of diskspace", f->name);
 		die_errno("sha1 file '%s' write error", f->name);
 	}
+
+	f->total += count;
+	display_throughput(f->tp, f->total);
 }
 
 void hashflush(struct hashfile *f)

From f302c1e4aa09ca6120968256d65dba291990929d Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Tue, 18 May 2021 09:53:37 +0900
Subject: [PATCH 024/110] revisions(7): clarify that most commands take a
 single revision range

Sometimes new people are confused by how a revision "range" works,
in that it is not a random collection of commits but a set of
commits that are all connected to each other, and most Git commands
work on a single such "range".

Give an example to clarify it.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/revisions.txt | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/Documentation/revisions.txt b/Documentation/revisions.txt
index d9169c062eb1ab..f5f17b65a12e8e 100644
--- a/Documentation/revisions.txt
+++ b/Documentation/revisions.txt
@@ -260,6 +260,9 @@ any of the given commits.
 A commit's reachable set is the commit itself and the commits in
 its ancestry chain.
 
+There are several notations to specify a set of connected commits
+(called a "revision range"), illustrated below.
+
 
 Commit Exclusions
 ~~~~~~~~~~~~~~~~~
@@ -294,6 +297,26 @@ is a shorthand for 'HEAD..origin' and asks "What did the origin do since
 I forked from them?"  Note that '..' would mean 'HEAD..HEAD' which is an
 empty range that is both reachable and unreachable from HEAD.
 
+Commands that are specifically designed to take two distinct ranges
+(e.g. "git range-diff R1 R2" to compare two ranges) do exist, but
+they are exceptions.  Unless otherwise noted, all "git" commands
+that operate on a set of commits work on a single revision range.
+In other words, writing two "two-dot range notation" next to each
+other, e.g.
+
+    $ git log A..B C..D
+
+does *not* specify two revision ranges for most commands.  Instead
+it will name a single connected set of commits, i.e. those that are
+reachable from either B or D but are reachable from neither A or C.
+In a linear history like this:
+
+    ---A---B---o---o---C---D
+
+because A and B are reachable from C, the revision range specified
+by these two dotted ranges is a single commit D.
+
+
 Other <rev>{caret} Parent Shorthand Notations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Three other shorthands exist, particularly useful for merge commits,

From aafa5df0df39036c6500846acd3db5b75d264a3b Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Tue, 18 May 2021 18:52:56 -0700
Subject: [PATCH 025/110] xsize_t: avoid implementation defined behavior when
 len < 0

The xsize_t helper aims to safely convert an off_t to a size_t,
erroring out when a file offset is too large to fit into a memory
address.  It does this by using two casts:

	size_t size = (size_t) len;
	if (len != (off_t) size)
		... error out ...

On a platform with sizeof(size_t) < sizeof(off_t), this check is safe
and correct.  The first cast truncates to a size_t by finding the
remainder modulo SIZE_MAX+1 (see C99 section 6.3.1.3 Signed and
unsigned integers) and the second promotes to an off_t, meaning the
result is true if and only if len is representable as a size_t.

On other platforms, this two-casts strategy still works well (always
succeeds) for len >= 0.  But for len < 0, when the first cast succeeds
and produces SIZE_MAX + 1 + len, the resulting value is too large to
be represented as an off_t, so the second cast produces implementation
defined behavior.  In practice, it is likely to produce a result of
true despite len not being representable as size_t.

Simplify by replacing with a more straightforward check: compare len
to the relevant bounds and then cast it.  (To avoid a -Wsign-compare
warning, after checking that len >= 0, we explicitly convert to a
sufficiently-large unsigned type before comparing to SIZE_MAX.)

In practice, this is not likely to come up since typical callers use
nonnegative len.  Still, it's helpful to handle this case to make the
behavior easy to reason about.

Historical note: the original bounds-checking in 46be82dfd0 (xsize_t:
check whether we lose bits, 2010-07-28) did not produce this
implementation-defined behavior, though it still did not handle
negative offsets.  It was not until 73560c793a (git-compat-util.h:
xsize_t() - avoid -Wsign-compare warnings, 2017-09-21) introduced the
double cast that the implementation-defined behavior was triggered.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-compat-util.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index dcc786edaab9dc..acef7f6bcb3f2b 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -981,11 +981,9 @@ static inline char *xstrdup_or_null(const char *str)
 
 static inline size_t xsize_t(off_t len)
 {
-	size_t size = (size_t) len;
-
-	if (len != (off_t) size)
+	if (len < 0 || (uintmax_t) len > SIZE_MAX)
 		die("Cannot handle files this big");
-	return size;
+	return (size_t) len;
 }
 
 __attribute__((format (printf, 3, 4)))

From 2ca245f8be56e3269d02076b658e825b91236e5d Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 18 May 2021 18:32:46 +0000
Subject: [PATCH 026/110] csum-file.h: increase hashfile buffer size

The hashfile API uses a hard-coded buffer size of 8KB and has ever since
it was introduced in c38138c (git-pack-objects: write the pack files
with a SHA1 csum, 2005-06-26). It performs a similar function to the
hashing buffers in read-cache.c, but that code was updated from 8KB to
128KB in f279894 (read-cache: make the index write buffer size 128K,
2021-02-18). The justification there was that do_write_index() improves
from 1.02s to 0.72s. Since our end goal is to have the index writing
code use the hashfile API, we need to unify this buffer size to avoid a
performance regression.

There is a buffer, 'check_buffer', that is used to verify the check_fd
file descriptor. When this buffer increases to 128K to fit the data
being flushed, it causes the stack to overflow the limits placed in the
test suite. To avoid issues with stack size, move both 'buffer' and
'check_buffer' to be heap pointers within 'struct hashfile'. The
'check_buffer' member is left as NULL unless check_fd is set in
hashfd_check(). Both buffers are cleared as part of finalize_hashfile()
which also frees the full structure.

Since these buffers are now on the heap, we can adjust their size based
on the needs of the consumer. In particular, callers to
hashfd_throughput() are expecting to report progress indicators as the
buffer flushes. These callers would prefer the smaller 8k buffer to
avoid large delays between updates, especially for users with slower
networks. When the progress indicator is not used, the larger buffer is
preferrable.

By adding a new trace2 region in the chunk-format API, we can see that
the writing portion of 'git multi-pack-index write' lowers from ~1.49s
to ~1.47s on a Linux machine. These effects may be more pronounced or
diminished on other filesystems. The end-to-end timing is too noisy to
have a definitive change either way.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 chunk-format.c | 12 +++++---
 csum-file.c    | 77 +++++++++++++++++++++++++++++++++++++-------------
 csum-file.h    |  4 ++-
 3 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/chunk-format.c b/chunk-format.c
index da191e59a29db6..1c3dca62e2057b 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -58,9 +58,11 @@ void add_chunk(struct chunkfile *cf,
 
 int write_chunkfile(struct chunkfile *cf, void *data)
 {
-	int i;
+	int i, result = 0;
 	uint64_t cur_offset = hashfile_total(cf->f);
 
+	trace2_region_enter("chunkfile", "write", the_repository);
+
 	/* Add the table of contents to the current offset */
 	cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
 
@@ -77,10 +79,10 @@ int write_chunkfile(struct chunkfile *cf, void *data)
 
 	for (i = 0; i < cf->chunks_nr; i++) {
 		off_t start_offset = hashfile_total(cf->f);
-		int result = cf->chunks[i].write_fn(cf->f, data);
+		result = cf->chunks[i].write_fn(cf->f, data);
 
 		if (result)
-			return result;
+			goto cleanup;
 
 		if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
 			BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
@@ -88,7 +90,9 @@ int write_chunkfile(struct chunkfile *cf, void *data)
 			    hashfile_total(cf->f) - start_offset);
 	}
 
-	return 0;
+cleanup:
+	trace2_region_leave("chunkfile", "write", the_repository);
+	return result;
 }
 
 int read_table_of_contents(struct chunkfile *cf,
diff --git a/csum-file.c b/csum-file.c
index 3c26389d4914df..3487d28ed7ada1 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -11,19 +11,24 @@
 #include "progress.h"
 #include "csum-file.h"
 
+static void verify_buffer_or_die(struct hashfile *f,
+				 const void *buf,
+				 unsigned int count)
+{
+	ssize_t ret = read_in_full(f->check_fd, f->check_buffer, count);
+
+	if (ret < 0)
+		die_errno("%s: sha1 file read error", f->name);
+	if (ret != count)
+		die("%s: sha1 file truncated", f->name);
+	if (memcmp(buf, f->check_buffer, count))
+		die("sha1 file '%s' validation error", f->name);
+}
+
 static void flush(struct hashfile *f, const void *buf, unsigned int count)
 {
-	if (0 <= f->check_fd && count)  {
-		unsigned char check_buffer[8192];
-		ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
-
-		if (ret < 0)
-			die_errno("%s: sha1 file read error", f->name);
-		if (ret != count)
-			die("%s: sha1 file truncated", f->name);
-		if (memcmp(buf, check_buffer, count))
-			die("sha1 file '%s' validation error", f->name);
-	}
+	if (0 <= f->check_fd && count)
+		verify_buffer_or_die(f, buf, count);
 
 	if (write_in_full(f->fd, buf, count) < 0) {
 		if (errno == ENOSPC)
@@ -46,6 +51,13 @@ void hashflush(struct hashfile *f)
 	}
 }
 
+static void free_hashfile(struct hashfile *f)
+{
+	free(f->buffer);
+	free(f->check_buffer);
+	free(f);
+}
+
 int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
 {
 	int fd;
@@ -75,20 +87,20 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
 		if (close(f->check_fd))
 			die_errno("%s: sha1 file error on close", f->name);
 	}
-	free(f);
+	free_hashfile(f);
 	return fd;
 }
 
 void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 {
 	while (count) {
-		unsigned left = sizeof(f->buffer) - f->offset;
+		unsigned left = f->buffer_len - f->offset;
 		unsigned nr = count > left ? left : count;
 
 		if (f->do_crc)
 			f->crc32 = crc32(f->crc32, buf, nr);
 
-		if (nr == sizeof(f->buffer)) {
+		if (nr == f->buffer_len) {
 			/*
 			 * Flush a full batch worth of data directly
 			 * from the input, skipping the memcpy() to
@@ -114,11 +126,6 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 	}
 }
 
-struct hashfile *hashfd(int fd, const char *name)
-{
-	return hashfd_throughput(fd, name, NULL);
-}
-
 struct hashfile *hashfd_check(const char *name)
 {
 	int sink, check;
@@ -132,10 +139,14 @@ struct hashfile *hashfd_check(const char *name)
 		die_errno("unable to open '%s'", name);
 	f = hashfd(sink, name);
 	f->check_fd = check;
+	f->check_buffer = xmalloc(f->buffer_len);
+
 	return f;
 }
 
-struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
+static struct hashfile *hashfd_internal(int fd, const char *name,
+					struct progress *tp,
+					size_t buffer_len)
 {
 	struct hashfile *f = xmalloc(sizeof(*f));
 	f->fd = fd;
@@ -146,9 +157,35 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
 	f->name = name;
 	f->do_crc = 0;
 	the_hash_algo->init_fn(&f->ctx);
+
+	f->buffer_len = buffer_len;
+	f->buffer = xmalloc(buffer_len);
+	f->check_buffer = NULL;
+
 	return f;
 }
 
+struct hashfile *hashfd(int fd, const char *name)
+{
+	/*
+	 * Since we are not going to use a progress meter to
+	 * measure the rate of data passing through this hashfile,
+	 * use a larger buffer size to reduce fsync() calls.
+	 */
+	return hashfd_internal(fd, name, NULL, 128 * 1024);
+}
+
+struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
+{
+	/*
+	 * Since we are expecting to report progress of the
+	 * write into this hashfile, use a smaller buffer
+	 * size so the progress indicators arrive at a more
+	 * frequent rate.
+	 */
+	return hashfd_internal(fd, name, tp, 8 * 1024);
+}
+
 void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
 {
 	hashflush(f);
diff --git a/csum-file.h b/csum-file.h
index e54d53d1d0b353..3044bd19ab657e 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -16,7 +16,9 @@ struct hashfile {
 	const char *name;
 	int do_crc;
 	uint32_t crc32;
-	unsigned char buffer[8192];
+	size_t buffer_len;
+	unsigned char *buffer;
+	unsigned char *check_buffer;
 };
 
 /* Checkpoint */

From 410334ed5212979a3ff6c8ca119ca14290ee7790 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 18 May 2021 18:32:47 +0000
Subject: [PATCH 027/110] read-cache: use hashfile instead of git_hash_ctx

The do_write_index() method in read-cache.c has its own hashing logic
and buffering mechanism. Specifically, the ce_write() method was
introduced by 4990aadc (Speed up index file writing by chunking it
nicely, 2005-04-20) and similar mechanisms were introduced a few months
later in c38138cd (git-pack-objects: write the pack files with a SHA1
csum, 2005-06-26). Based on the timing, in the early days of the Git
codebase, I figured that these roughly equivalent code paths were never
unified only because it got lost in the shuffle. The hashfile API has
since been used extensively in other file formats, such as pack-indexes,
multi-pack-indexes, and commit-graphs. Therefore, it seems prudent to
unify the index writing code to use the same mechanism.

I discovered this disparity while trying to create a new index format
that uses the chunk-format API. That API uses a hashfile as its base, so
it is incompatible with the custom code in read-cache.c.

This rewrite is rather straightforward. It replaces all writes to the
temporary file with writes to the hashfile struct. This takes care of
many of the direct interactions with the_hash_algo.

There are still some git_hash_ctx uses remaining: the extension headers
are hashed for use in the End of Index Entries (EOIE) extension. This
use of the git_hash_ctx is left as-is. There are multiple reasons to not
use a hashfile here, including the fact that the data is not actually
writing to a file, just a hash computation. These hashes do not block
our adoption of the chunk-format API in a future change to the index, so
leave it as-is.

The internals of the algorithms are mostly identical. Previously, the
hashfile API used a smaller 8KB buffer instead of the 128KB buffer from
read-cache.c. The previous change already unified these sizes.

There is one subtle point: we do not pass the CSUM_FSYNC to the
finalize_hashfile() method, which differs from most consumers of the
hashfile API. The extra fsync() call indicated by this flag causes a
significant peformance degradation that is noticeable for quick
commands that write the index, such as "git add". Other consumers can
absorb this cost with their more complicated data structure
organization, and further writing structures such as pack-files and
commit-graphs is rarely in the critical path for common user
interactions.

Some static methods become orphaned in this diff, so I marked them as
MAYBE_UNUSED. The diff is much harder to read if they are deleted during
this change. Instead, they will be deleted in the following change.

In addition to the test suite passing, I computed indexes using the
previous binaries and the binaries compiled after this change, and found
the index data to be exactly equal. Finally, I did extensive performance
testing of "git update-index --force-write" on repos of various sizes,
including one with over 2 million paths at HEAD. These tests
demonstrated less than 1% difference in behavior. As expected, the
performance should be considered unchanged. The previous changes to
increase the hashfile buffer size from 8K to 128K ensured this change
would not create a peformance regression.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c | 137 +++++++++++++++++++++++++--------------------------
 1 file changed, 66 insertions(+), 71 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 1b3c2eb408be33..e3c9ab8033e9aa 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -26,6 +26,7 @@
 #include "thread-utils.h"
 #include "progress.h"
 #include "sparse-index.h"
+#include "csum-file.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -2525,6 +2526,7 @@ int repo_index_has_changes(struct repository *repo,
 static unsigned char write_buffer[WRITE_BUFFER_SIZE];
 static unsigned long write_buffer_len;
 
+MAYBE_UNUSED
 static int ce_write_flush(git_hash_ctx *context, int fd)
 {
 	unsigned int buffered = write_buffer_len;
@@ -2537,6 +2539,7 @@ static int ce_write_flush(git_hash_ctx *context, int fd)
 	return 0;
 }
 
+MAYBE_UNUSED
 static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
 {
 	while (len) {
@@ -2559,19 +2562,24 @@ static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
 	return 0;
 }
 
-static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context,
-				  int fd, unsigned int ext, unsigned int sz)
+static int write_index_ext_header(struct hashfile *f,
+				  git_hash_ctx *eoie_f,
+				  unsigned int ext,
+				  unsigned int sz)
 {
-	ext = htonl(ext);
-	sz = htonl(sz);
-	if (eoie_context) {
-		the_hash_algo->update_fn(eoie_context, &ext, 4);
-		the_hash_algo->update_fn(eoie_context, &sz, 4);
+	hashwrite_be32(f, ext);
+	hashwrite_be32(f, sz);
+
+	if (eoie_f) {
+		ext = htonl(ext);
+		sz = htonl(sz);
+		the_hash_algo->update_fn(eoie_f, &ext, sizeof(ext));
+		the_hash_algo->update_fn(eoie_f, &sz, sizeof(sz));
 	}
-	return ((ce_write(context, fd, &ext, 4) < 0) ||
-		(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
+	return 0;
 }
 
+MAYBE_UNUSED
 static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash)
 {
 	unsigned int left = write_buffer_len;
@@ -2673,11 +2681,10 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 	}
 }
 
-static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
+static int ce_write_entry(struct hashfile *f, struct cache_entry *ce,
 			  struct strbuf *previous_name, struct ondisk_cache_entry *ondisk)
 {
 	int size;
-	int result;
 	unsigned int saved_namelen;
 	int stripped_name = 0;
 	static unsigned char padding[8] = { 0x00 };
@@ -2693,11 +2700,9 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
 	if (!previous_name) {
 		int len = ce_namelen(ce);
 		copy_cache_entry_to_ondisk(ondisk, ce);
-		result = ce_write(c, fd, ondisk, size);
-		if (!result)
-			result = ce_write(c, fd, ce->name, len);
-		if (!result)
-			result = ce_write(c, fd, padding, align_padding_size(size, len));
+		hashwrite(f, ondisk, size);
+		hashwrite(f, ce->name, len);
+		hashwrite(f, padding, align_padding_size(size, len));
 	} else {
 		int common, to_remove, prefix_size;
 		unsigned char to_remove_vi[16];
@@ -2711,13 +2716,10 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
 		prefix_size = encode_varint(to_remove, to_remove_vi);
 
 		copy_cache_entry_to_ondisk(ondisk, ce);
-		result = ce_write(c, fd, ondisk, size);
-		if (!result)
-			result = ce_write(c, fd, to_remove_vi, prefix_size);
-		if (!result)
-			result = ce_write(c, fd, ce->name + common, ce_namelen(ce) - common);
-		if (!result)
-			result = ce_write(c, fd, padding, 1);
+		hashwrite(f, ondisk, size);
+		hashwrite(f, to_remove_vi, prefix_size);
+		hashwrite(f, ce->name + common, ce_namelen(ce) - common);
+		hashwrite(f, padding, 1);
 
 		strbuf_splice(previous_name, common, to_remove,
 			      ce->name + common, ce_namelen(ce) - common);
@@ -2727,7 +2729,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
 		ce->ce_flags &= ~CE_STRIP_NAME;
 	}
 
-	return result;
+	return 0;
 }
 
 /*
@@ -2839,8 +2841,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 			  int strip_extensions)
 {
 	uint64_t start = getnanotime();
-	int newfd = tempfile->fd;
-	git_hash_ctx c, eoie_c;
+	struct hashfile *f;
+	git_hash_ctx *eoie_c = NULL;
 	struct cache_header hdr;
 	int i, err = 0, removed, extended, hdr_version;
 	struct cache_entry **cache = istate->cache;
@@ -2854,6 +2856,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	struct index_entry_offset_table *ieot = NULL;
 	int nr, nr_threads;
 
+	f = hashfd(tempfile->fd, tempfile->filename.buf);
+
 	for (i = removed = extended = 0; i < entries; i++) {
 		if (cache[i]->ce_flags & CE_REMOVE)
 			removed++;
@@ -2882,9 +2886,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	hdr.hdr_version = htonl(hdr_version);
 	hdr.hdr_entries = htonl(entries - removed);
 
-	the_hash_algo->init_fn(&c);
-	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
-		return -1;
+	hashwrite(f, &hdr, sizeof(hdr));
 
 	if (!HAVE_THREADS || git_config_get_index_threads(&nr_threads))
 		nr_threads = 1;
@@ -2919,12 +2921,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		}
 	}
 
-	offset = lseek(newfd, 0, SEEK_CUR);
-	if (offset < 0) {
-		free(ieot);
-		return -1;
-	}
-	offset += write_buffer_len;
+	offset = hashfile_total(f);
+
 	nr = 0;
 	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
 
@@ -2959,14 +2957,10 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 			if (previous_name)
 				previous_name->buf[0] = 0;
 			nr = 0;
-			offset = lseek(newfd, 0, SEEK_CUR);
-			if (offset < 0) {
-				free(ieot);
-				return -1;
-			}
-			offset += write_buffer_len;
+
+			offset = hashfile_total(f);
 		}
-		if (ce_write_entry(&c, newfd, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
+		if (ce_write_entry(f, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
 			err = -1;
 
 		if (err)
@@ -2985,14 +2979,16 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		return err;
 	}
 
-	/* Write extension data here */
-	offset = lseek(newfd, 0, SEEK_CUR);
-	if (offset < 0) {
-		free(ieot);
-		return -1;
+	offset = hashfile_total(f);
+
+	/*
+	 * The extension headers must be hashed on their own for the
+	 * EOIE extension. Create a hashfile here to compute that hash.
+	 */
+	if (offset && record_eoie()) {
+		CALLOC_ARRAY(eoie_c, 1);
+		the_hash_algo->init_fn(eoie_c);
 	}
-	offset += write_buffer_len;
-	the_hash_algo->init_fn(&eoie_c);
 
 	/*
 	 * Lets write out CACHE_EXT_INDEXENTRYOFFSETTABLE first so that we
@@ -3005,8 +3001,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		write_ieot_extension(&sb, ieot);
-		err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(f, eoie_c, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		free(ieot);
 		if (err)
@@ -3018,9 +3014,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		err = write_link_extension(&sb, istate) < 0 ||
-			write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_LINK,
-					       sb.len) < 0 ||
-			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+			write_index_ext_header(f, eoie_c, CACHE_EXT_LINK,
+					       sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -3029,8 +3025,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		cache_tree_write(&sb, istate->cache_tree);
-		err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_TREE, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(f, eoie_c, CACHE_EXT_TREE, sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -3039,9 +3035,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		resolve_undo_write(&sb, istate->resolve_undo);
-		err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_RESOLVE_UNDO,
-					     sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(f, eoie_c, CACHE_EXT_RESOLVE_UNDO,
+					     sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -3050,9 +3046,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		write_untracked_extension(&sb, istate->untracked);
-		err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_UNTRACKED,
-					     sb.len) < 0 ||
-			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(f, eoie_c, CACHE_EXT_UNTRACKED,
+					     sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -3061,14 +3057,14 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		struct strbuf sb = STRBUF_INIT;
 
 		write_fsmonitor_extension(&sb, istate);
-		err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(f, eoie_c, CACHE_EXT_FSMONITOR, sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
 	}
 	if (istate->sparse_index) {
-		if (write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0)
+		if (write_index_ext_header(f, eoie_c, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0)
 			return -1;
 	}
 
@@ -3078,19 +3074,18 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	 * read.  Write it out regardless of the strip_extensions parameter as we need it
 	 * when loading the shared index.
 	 */
-	if (offset && record_eoie()) {
+	if (eoie_c) {
 		struct strbuf sb = STRBUF_INIT;
 
-		write_eoie_extension(&sb, &eoie_c, offset);
-		err = write_index_ext_header(&c, NULL, newfd, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		write_eoie_extension(&sb, eoie_c, offset);
+		err = write_index_ext_header(f, NULL, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0;
+		hashwrite(f, sb.buf, sb.len);
 		strbuf_release(&sb);
 		if (err)
 			return -1;
 	}
 
-	if (ce_flush(&c, newfd, istate->oid.hash))
-		return -1;
+	finalize_hashfile(f, istate->oid.hash, CSUM_HASH_IN_STREAM);
 	if (close_tempfile_gently(tempfile)) {
 		error(_("could not close '%s'"), get_tempfile_path(tempfile));
 		return -1;

From f6e2cd0625c8f15505a1bc11828b21490dff59e6 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 18 May 2021 18:32:48 +0000
Subject: [PATCH 028/110] read-cache: delete unused hashing methods

These methods were marked as MAYBE_UNUSED in the previous change to
avoid a complicated diff. Delete them entirely, since we now use the
hashfile API instead of this custom hashing code.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c | 64 ----------------------------------------------------
 1 file changed, 64 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index e3c9ab8033e9aa..77961a38854069 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2522,46 +2522,6 @@ int repo_index_has_changes(struct repository *repo,
 	}
 }
 
-#define WRITE_BUFFER_SIZE (128 * 1024)
-static unsigned char write_buffer[WRITE_BUFFER_SIZE];
-static unsigned long write_buffer_len;
-
-MAYBE_UNUSED
-static int ce_write_flush(git_hash_ctx *context, int fd)
-{
-	unsigned int buffered = write_buffer_len;
-	if (buffered) {
-		the_hash_algo->update_fn(context, write_buffer, buffered);
-		if (write_in_full(fd, write_buffer, buffered) < 0)
-			return -1;
-		write_buffer_len = 0;
-	}
-	return 0;
-}
-
-MAYBE_UNUSED
-static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
-{
-	while (len) {
-		unsigned int buffered = write_buffer_len;
-		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
-		if (partial > len)
-			partial = len;
-		memcpy(write_buffer + buffered, data, partial);
-		buffered += partial;
-		if (buffered == WRITE_BUFFER_SIZE) {
-			write_buffer_len = buffered;
-			if (ce_write_flush(context, fd))
-				return -1;
-			buffered = 0;
-		}
-		write_buffer_len = buffered;
-		len -= partial;
-		data = (char *) data + partial;
-	}
-	return 0;
-}
-
 static int write_index_ext_header(struct hashfile *f,
 				  git_hash_ctx *eoie_f,
 				  unsigned int ext,
@@ -2579,30 +2539,6 @@ static int write_index_ext_header(struct hashfile *f,
 	return 0;
 }
 
-MAYBE_UNUSED
-static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash)
-{
-	unsigned int left = write_buffer_len;
-
-	if (left) {
-		write_buffer_len = 0;
-		the_hash_algo->update_fn(context, write_buffer, left);
-	}
-
-	/* Flush first if not enough space for hash signature */
-	if (left + the_hash_algo->rawsz > WRITE_BUFFER_SIZE) {
-		if (write_in_full(fd, write_buffer, left) < 0)
-			return -1;
-		left = 0;
-	}
-
-	/* Append the hash signature at the end */
-	the_hash_algo->final_fn(write_buffer + left, context);
-	hashcpy(hash, write_buffer + left);
-	left += the_hash_algo->rawsz;
-	return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0;
-}
-
 static void ce_smudge_racily_clean_entry(struct index_state *istate,
 					 struct cache_entry *ce)
 {

From e22f2daed0fde4595696e09c6802a1ecd5e0c527 Mon Sep 17 00:00:00 2001
From: Reuven Y <robi@post.jce.ac.il>
Date: Wed, 19 May 2021 09:28:49 +0000
Subject: [PATCH 029/110] docs: improve fast-forward in glossary content

The text was somewhat confusing between the revision itself and the author.

Signed-off-by: Reuven Yagel <robi@post.jce.ac.il>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/glossary-content.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
index 67c7a50b96a0b2..c0779713355c73 100644
--- a/Documentation/glossary-content.txt
+++ b/Documentation/glossary-content.txt
@@ -146,8 +146,8 @@ current branch integrates with) obviously do not work, as there is no
 	<<def_revision,revision>> and you are "merging" another
 	<<def_branch,branch>>'s changes that happen to be a descendant of what
 	you have. In such a case, you do not make a new <<def_merge,merge>>
-	<<def_commit,commit>> but instead just update to his
-	revision. This will happen frequently on a
+	<<def_commit,commit>> but instead just update your branch to point at the same
+	revision as the branch you are merging. This will happen frequently on a
 	<<def_remote_tracking_branch,remote-tracking branch>> of a remote
 	<<def_repository,repository>>.
 

From 6aacb7d8619c50b3c92fe3e99b93ffa9b6065dfc Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 19 May 2021 07:17:15 -0400
Subject: [PATCH 030/110] clone: clean up directory after
 transport_fetch_refs() failure

git-clone started respecting errors from the transport subsystem in
aab179d937 (builtin/clone.c: don't ignore transport_fetch_refs() errors,
2020-12-03). However, that commit didn't handle the cleanup of the
filesystem quite right.

The cleanup of the directory that cmd_clone() creates is done by an
atexit() handler, which we control with a flag. It starts as
JUNK_LEAVE_NONE ("clean up everything"), then progresses to
JUNK_LEAVE_REPO when we know we have a valid repo but not working tree,
and then finally JUNK_LEAVE_ALL when we have a successful checkout.

Most errors cause us to die(), which then triggers the handler to do the
right thing based on how far into cmd_clone() we got. But the checks
added by aab179d937 instead set the "err" variable and then jump to a
new "cleanup" label, which then returns our non-zero status. However,
the code after the cleanup label includes setting the flag to
JUNK_LEAVE_ALL, and so we accidentally leave the repository and working
tree in place.

One obvious option to fix this is to reorder the end of the function to
set the flag first, before cleanup code, and put the label between them.

But we can observe another small bug: the error return from
transport_fetch_refs() is generally "-1", and we propagate that to the
return value of cmd_clone(), which ultimately becomes the exit code of
the process. And we try to avoid transmitting negative values via exit
codes (only the low 8 bits are passed along as an unsigned value, though
in practice for "-1" this at least retains the property that it's
non-zero).

Instead, let's just die(). That makes us consistent with rest of the
code in the function. It does add a new "fatal:" line to the output, but
I'd argue that's a good thing:

  - in the rare case that the transport code didn't say anything, now
    the user gets _some_ error message

  - even if the transport code said something like "error: ssh died of
    signal 9", it's nice to also say "fatal" to indicate that we
    considered that to be a show-stopper.

Triggering this in the test suite turns out to be surprisingly
difficult. Almost every error we'd encounter, including ones deep inside
the transport code, cause us to just die() right there! However, one way
is to put a fake wrapper around git-upload-pack that sends the complete
packfile but exits with a failure code.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c               | 11 ++++-------
 t/t5600-clone-fail-cleanup.sh |  7 +++++++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index e335734b4cfd32..5888d2ae606f53 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1294,9 +1294,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 			}
 
 		if (!is_local && !complete_refs_before_fetch) {
-			err = transport_fetch_refs(transport, mapped_refs);
-			if (err)
-				goto cleanup;
+			if (transport_fetch_refs(transport, mapped_refs))
+				die(_("remote transport reported error"));
 		}
 
 		remote_head = find_ref_by_name(refs, "HEAD");
@@ -1343,9 +1342,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	if (is_local)
 		clone_local(path, git_dir);
 	else if (refs && complete_refs_before_fetch) {
-		err = transport_fetch_refs(transport, mapped_refs);
-		if (err)
-			goto cleanup;
+		if (transport_fetch_refs(transport, mapped_refs))
+			die(_("remote transport reported error"));
 	}
 
 	update_remote_refs(refs, mapped_refs, remote_head_points_at,
@@ -1373,7 +1371,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	junk_mode = JUNK_LEAVE_REPO;
 	err = checkout(submodule_progress);
 
-cleanup:
 	free(remote_name);
 	strbuf_release(&reflog_msg);
 	strbuf_release(&branch_top);
diff --git a/t/t5600-clone-fail-cleanup.sh b/t/t5600-clone-fail-cleanup.sh
index 4a1a912e032983..5bf10261d363ae 100755
--- a/t/t5600-clone-fail-cleanup.sh
+++ b/t/t5600-clone-fail-cleanup.sh
@@ -97,4 +97,11 @@ test_expect_success 'failed clone into empty leaves directory (separate, wt)' '
 	test_dir_is_empty empty-wt
 '
 
+test_expect_success 'transport failure cleans up directory' '
+	test_must_fail git clone --no-local \
+		-u "f() { git-upload-pack \"\$@\"; return 1; }; f" \
+		foo broken-clone &&
+	test_path_is_missing broken-clone
+'
+
 test_done

From ae1a7eefffe60425e6bf6a2065e042ae051cfb6c Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 19 May 2021 12:11:05 -0400
Subject: [PATCH 031/110] fetch-pack: signal v2 server that we are done making
 requests

When fetching with the v0 protocol over ssh (or a local upload-pack with
pipes), the server closes the connection as soon as it is finished
sending the pack. So even though the client may still be operating on
the data via index-pack (e.g., resolving deltas, checking connectivity,
etc), the server has released all resources.

With the v2 protocol, however, the server considers the ssh session only
as a transport, with individual requests coming over it. After sending
the pack, it goes back to its main loop, waiting for another request to
come from the client. As a result, the ssh session hangs around until
the client process ends, which may be much later (because resolving
deltas, etc, may consume a lot of CPU).

This is bad for two reasons:

  - it's consuming resources on the server to leave open a connection
    that won't see any more use

  - if something bad happens to the ssh connection in the meantime (say,
    it gets killed by the network because it's idle, as happened in a
    real-world report), then ssh will exit non-zero, and we'll propagate
    the error up the stack.

The server is correct here not to hang up after serving the pack. The v2
protocol's design is meant to allow multiple requests like this, and
hanging up would be the wrong thing for a hypothetical client which was
planning to make more requests (though in practice, the git.git client
never would, and I doubt any other implementations would either).

The right thing is instead for the client to signal to the server that
it's not interested in making more requests. We can do that by closing
the pipe descriptor we use to write to ssh. This will propagate to the
server upload-pack as an EOF when it tries to read the next request (and
then it will close its half, and the whole connection will go away).

It's important to do this "half duplex" shutdown, because we have to do
it _before_ we actually receive the pack. This is an artifact of the way
fetch-pack and index-pack (or unpack-objects) interact. We hand the
connection off to index-pack (really, a sideband demuxer which feeds
it), and then wait until it returns. And it doesn't do that until it has
resolved all of the deltas in the pack, even though it was done reading
from the server long before.

So just closing the connection fully after index-pack returns would be
too late; we'd have held it open much longer than was necessary. And
teaching index-pack to close the connection is awkward. It's not even
seeing the whole conversation (the sideband demuxer is, but it doesn't
actually know what's in the packets, or when the end comes).

Note that this close() is happening deep within the transport code. It's
possible that a caller would want to perform other operations over the
same ssh transport after receiving the pack. But as of the current code,
none of the callers do, and there haven't been discussions of any plans
to change this. If we need to support that later, we can probably do so
by passing down a flag for "you're the last request on the transport;
it's OK to close" instead of the code just assuming that's true.

The description above all discusses v2 ssh, so it's worth thinking about
how this interacts with other protocols:

  - in v0 protocols, we could do the same half-duplex shutdown (it just
    goes into the v0 do_fetch_pack() instead). This does work, but since
    it doesn't have the same persistence problem in the first place,
    there's little reason to change it at this point.

  - local fetches against git-upload-pack on the same machine will
    behave the same as ssh (they are talking over two pipes, and see EOF
    on their input pipe)

  - fetches against git-daemon will run this same code, and close one of
    the descriptors. In practice, this won't do anything, since there
    our two descriptors are dups of each other, and not part of a
    half-duplex pair. The right thing would probably be to call
    shutdown(SHUT_WR) on it. I didn't bother with that here. It doesn't
    face the same error-code problem (since it's just a TCP connection),
    so it's really only an optimization problem. And git:// is not that
    widely used these days, and has less impact on server resources than
    an ssh termination.

  - v2 http doesn't suffer from this problem in the first place, as our
    pipes terminate at a local git-remote-https, which is passing data
    along as individual requests via curl. Probably curl is keeping the
    TCP/TLS connection open for more requests, and we might be able to
    tell it manually "hey, we are done making requests now". But I think
    that's much less important. It again doesn't suffer from the
    error-code problem, and HTTP keepalive is pretty well understood
    (importantly, the timeouts can be set low, because clients like curl
    know how to reconnect for subsequent requests if necessary). So it's
    probably not worth figuring out how to tell curl that we're done
    (though if we do, this patch is probably the first step anyway;
    fetch-pack closes the pipe back to remote-https, which would be the
    signal that it should tell curl we're done).

The code is pretty straightforward. We close the pipe at the right
moment, and set it to -1 to mark it as invalid. I modified the later
cleanup code to avoid calling close(-1). That's not strictly necessary,
since close(-1) is a noop, but hopefully makes things a bit more obvious
to a reader.

I suspect that trying to call more transport functions after the close()
(e.g., calling transport_fetch_refs() again) would fail, as it's not
smart enough to realize we need to re-open the ssh connection. But
that's already true when v0 is in use. And no current callers want to do
that (and again, the solution is probably a flag in the transport code
to keep things open, which can be added later).

There's no test here, as the situation it covers is inherently racy (the
question is when upload-pack exits, compared to when index-pack finishes
resolving deltas and exits). The rather gross shell snippet below does
recreate the problematic situation; when run on a sufficiently-large
repository (git.git works fine), it kills an "idle" upload-pack while
the client is resolving deltas, leading to a failed clone.

    (
	    git clone --no-local --progress . foo.git 2>&1
	    echo >&2 "clone exit code=$?"
    ) |
    tr '\r' '\n' |
    while read line
    do
	    case "$done,$line" in
	    ,Resolving*)
		    echo "hit resolving deltas; killing upload-pack"
		    killall -9 git-upload-pack
		    done=t
		    ;;
	    esac
    done

Reported-by: Greg Pflaum <greg.pflaum@pnp-hcl.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fetch-pack.c | 9 +++++++++
 transport.c  | 6 ++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index c135635e34a900..b0c7be717c7fdd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1645,6 +1645,15 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			if (process_section_header(&reader, "packfile-uris", 1))
 				receive_packfile_uris(&reader, &packfile_uris);
 			process_section_header(&reader, "packfile", 0);
+
+			/*
+			 * this is the final request we'll make of the server;
+			 * do a half-duplex shutdown to indicate that they can
+			 * hang up as soon as the pack is sent.
+			 */
+			close(fd[1]);
+			fd[1] = -1;
+
 			if (get_pack(args, fd, pack_lockfiles,
 				     packfile_uris.nr ? &index_pack_args : NULL,
 				     sought, nr_sought, &fsck_options.gitmodules_found))
diff --git a/transport.c b/transport.c
index 6cf3da19ebd26f..50f5830eb6b98e 100644
--- a/transport.c
+++ b/transport.c
@@ -427,7 +427,8 @@ static int fetch_refs_via_pack(struct transport *transport,
 
 cleanup:
 	close(data->fd[0]);
-	close(data->fd[1]);
+	if (data->fd[1] >= 0)
+		close(data->fd[1]);
 	if (finish_connect(data->conn))
 		ret = -1;
 	data->conn = NULL;
@@ -869,7 +870,8 @@ static int disconnect_git(struct transport *transport)
 		if (data->got_remote_heads && !transport->stateless_rpc)
 			packet_flush(data->fd[1]);
 		close(data->fd[0]);
-		close(data->fd[1]);
+		if (data->fd[1] >= 0)
+			close(data->fd[1]);
 		finish_connect(data->conn);
 	}
 

From 09667e95163a9d51e569c50b1fdfc78d0a002c23 Mon Sep 17 00:00:00 2001
From: Alex Henrie <alexhenrie24@gmail.com>
Date: Tue, 18 May 2021 00:18:55 -0600
Subject: [PATCH 032/110] fetch: improve grammar of "shallow roots" message

Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fetch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index dfde96a4354c14..9191620e50c7a4 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1126,7 +1126,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
 
 			if (rm->status == REF_STATUS_REJECT_SHALLOW) {
 				if (want_status == FETCH_HEAD_MERGE)
-					warning(_("reject %s because shallow roots are not allowed to be updated"),
+					warning(_("rejected %s because shallow roots are not allowed to be updated"),
 						rm->peer_ref ? rm->peer_ref->name : rm->name);
 				continue;
 			}

From 8013d7d9ee7674774f6dbdbaeab11ce173bee016 Mon Sep 17 00:00:00 2001
From: Alex Henrie <alexhenrie24@gmail.com>
Date: Tue, 18 May 2021 00:19:17 -0600
Subject: [PATCH 033/110] setup: split "extensions found" messages into
 singular and plural

It's easier to translate this way.

Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 setup.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/setup.c b/setup.c
index 59e2facd9db671..ead2f80cd8ee4a 100644
--- a/setup.c
+++ b/setup.c
@@ -666,7 +666,9 @@ int verify_repository_format(const struct repository_format *format,
 	if (format->version >= 1 && format->unknown_extensions.nr) {
 		int i;
 
-		strbuf_addstr(err, _("unknown repository extensions found:"));
+		strbuf_addstr(err, Q_("unknown repository extension found:",
+				      "unknown repository extensions found:",
+				      format->unknown_extensions.nr));
 
 		for (i = 0; i < format->unknown_extensions.nr; i++)
 			strbuf_addf(err, "\n\t%s",
@@ -678,7 +680,9 @@ int verify_repository_format(const struct repository_format *format,
 		int i;
 
 		strbuf_addstr(err,
-			      _("repo version is 0, but v1-only extensions found:"));
+			      Q_("repo version is 0, but v1-only extension found:",
+				 "repo version is 0, but v1-only extensions found:",
+				 format->v1_only_extensions.nr));
 
 		for (i = 0; i < format->v1_only_extensions.nr; i++)
 			strbuf_addf(err, "\n\t%s",

From a84216c68435f545de38d69c771a6f2ade480394 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 19 May 2021 05:17:09 -0400
Subject: [PATCH 034/110] doc: explain the use of color.pager

The current documentation for color.pager is technically correct, but
slightly misleading and doesn't really clarify the purpose of the
variable. As explained in the original thread which added it:

  https://lore.kernel.org/git/E1G6zPH-00062L-Je@moooo.ath.cx/

the point is to deal with pagers that don't understand colors. And hence it
being set to "true" is necessary for colorizing output to the pager, but
not sufficient by itself (you must also have enabled one of the other
color options, though note that these are set to "auto" by default these
days).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config/color.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt
index d5daacb13a2c77..e05d520a867490 100644
--- a/Documentation/config/color.txt
+++ b/Documentation/config/color.txt
@@ -127,8 +127,9 @@ color.interactive.<slot>::
 	interactive commands.
 
 color.pager::
-	A boolean to enable/disable colored output when the pager is in
-	use (default is true).
+	A boolean to specify whether `auto` color modes should colorize
+	output going to the pager. Defaults to true; set this to false
+	if your pager does not understand ANSI color codes.
 
 color.push::
 	A boolean to enable/disable color in push errors. May be set to

From bb80333c0853b04ff789de8037bef5a4ade1da8c Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:30 +0000
Subject: [PATCH 035/110] Documentation/technical: describe remembering renames
 optimization

Remembering renames on the upstream side of history in an early merge of
a rebase or cherry-pick for re-use in a latter merge of the same
operation makes pretty good intuitive sense.  However, trying to show
that it doesn't cause some subtle behavioral difference or some funny
edge or corner case is much more involved.  And, in fact, it does
introduce a subtle behavioral change.

Document all the assumptions, special cases, and logic involved in such
an optimization, and describe why this optimization is safe under the
current optimizations/features/etc. -- even when the subtle behavioral
change is triggered.

Part of the point of adding this document that goes over the
optimization in such laborious detail, is that it is possible that
significant future changes (optimizations or feature changes) could
interact with this optimization in interesting ways; this document is
here to help folks making big changes sanity check that the assumptions
and arguments underlying this optimization are still valid.  (As a side
note, creating this document forced me to review things in sufficient
detail that I found I was not properly caching directory-rename-induced
renames, resulting in the code not being aware of those renames and
causing unnecessary diffcore_rename_extended() calls in subsequent
merges.)

A subsequent commit will add several testcases based on this document
meant to stress-test the implementation and also document the case with
the subtle behavioral change.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .../technical/remembering-renames.txt         | 669 ++++++++++++++++++
 1 file changed, 669 insertions(+)
 create mode 100644 Documentation/technical/remembering-renames.txt

diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt
new file mode 100644
index 00000000000000..251ce477e059da
--- /dev/null
+++ b/Documentation/technical/remembering-renames.txt
@@ -0,0 +1,669 @@
+Rebases and cherry-picks involve a sequence of merges whose results are
+recorded as new single-parent commits.  The first parent side of those
+merges represent the "upstream" side, and often include a far larger set of
+changes than the second parent side.  Traditionally, the renames on the
+first-parent side of that sequence of merges were repeatedly re-detected
+for every merge.  This file explains why it is safe and effective during
+rebases and cherry-picks to remember renames on the upstream side of
+history as an optimization, assuming all merges are automatic and clean
+(i.e. no conflicts and not interrupted for user input or editing).
+
+Outline:
+
+  0. Assumptions
+
+  1. How rebasing and cherry-picking work
+
+  2. Why the renames on MERGE_SIDE1 in any given pick are *always* a
+     superset of the renames on MERGE_SIDE1 for the next pick.
+
+  3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ always also
+     a rename on MERGE_SIDE1 for the next pick
+
+  4. A detailed description of the the counter-examples to #3.
+
+  5. Why the special cases in #4 are still fully reasonable to use to pair
+     up files for three-way content merging in the merge machinery, and why
+     they do not affect the correctness of the merge.
+
+  6. Interaction with skipping of "irrelevant" renames
+
+  7. Additional items that need to be cached
+
+  8. How directory rename detection interacts with the above and why this
+     optimization is still safe even if merge.directoryRenames is set to
+     "true".
+
+
+=== 0. Assumptions ===
+
+There are two assumptions that will hold throughout this document:
+
+  * The upstream side where commits are transplanted to is treated as the
+    first parent side when rebase/cherry-pick call the merge machinery
+
+  * All merges are fully automatic
+
+and a third that will hold in sections 2-5 for simplicity, that I'll later
+address in section 8:
+
+  * No directory renames occur
+
+
+Let me explain more about each assumption and why I include it:
+
+
+The first assumption is merely for the purposes of making this document
+clearer; the optimization implementation does not actually depend upon it.
+However, the assumption does hold in all cases because it reflects the way
+that both rebase and cherry-pick were implemented; and the implementation
+of cherry-pick and rebase are not readily changeable for backwards
+compatibility reasons (see for example the discussion of the --ours and
+--theirs flag in the documentation of `git checkout`, particularly the
+comments about how they behave with rebase).  The optimization avoids
+checking first-parent-ness, though.  It checks the conditions that make the
+optimization valid instead, so it would still continue working if someone
+changed the parent ordering that cherry-pick and rebase use.  But making
+this assumption does make this document much clearer and prevents me from
+having to repeat every example twice.
+
+If the second assumption is violated, then the optimization simply is
+turned off and thus isn't relevant to consider.  The second assumption can
+also be stated as "there is no interruption for a user to resolve conflicts
+or to just further edit or tweak files".  While real rebases and
+cherry-picks are often interrupted (either because it's an interactive
+rebase where the user requested to stop and edit, or because there were
+conflicts that the user needs to resolve), the cache of renames is not
+stored on disk, and thus is thrown away as soon as the rebase or cherry
+pick stops for the user to resolve the operation.
+
+The third assumption makes sections 2-5 simpler, and allows people to
+understand the basics of why this optimization is safe and effective, and
+then I can go back and address the specifics in section 8.  It is probably
+also worth noting that if directory renames do occur, then the default of
+merge.directoryRenames being set to "conflict" means that the operation
+will stop for users to resolve the conflicts and the cache will be thrown
+away, and thus that there won't be an optimization to apply.  So, the only
+reason we need to address directory renames specifically, is that some
+users will have set merge.directoryRenames to "true" to allow the merges to
+continue to proceed automatically.  The optimization is still safe with
+this config setting, but we have to discuss a few more cases to show why;
+this discussion is deferred until section 8.
+
+
+=== 1. How rebasing and cherry-picking work ===
+
+Consider the following setup (from the git-rebase manpage):
+
+		     A---B---C topic
+		    /
+	       D---E---F---G main
+
+After rebasing or cherry-picking topic onto main, this will appear as:
+
+			     A'--B'--C' topic
+			    /
+	       D---E---F---G main
+
+The way the commits A', B', and C' are created is through a series of
+merges, where rebase or cherry-pick sequentially uses each of the three
+A-B-C commits in a special merge operation.  Let's label the three commits
+in the merge operation as MERGE_BASE, MERGE_SIDE1, and MERGE_SIDE2.  For
+this picture, the three commits for each of the three merges would be:
+
+To create A':
+   MERGE_BASE:   E
+   MERGE_SIDE1:  G
+   MERGE_SIDE2:  A
+
+To create B':
+   MERGE_BASE:   A
+   MERGE_SIDE1:  A'
+   MERGE_SIDE2:  B
+
+To create C':
+   MERGE_BASE:   B
+   MERGE_SIDE1:  B'
+   MERGE_SIDE2:  C
+
+Sometimes, folks are surprised that these three-way merges are done.  It
+can be useful in understanding these three-way merges to view them in a
+slightly different light.  For example, in creating C', you can view it as
+either:
+
+  * Apply the changes between B & C to B'
+  * Apply the changes between B & B' to C
+
+Conceptually the two statements above are the same as a three-way merge of
+B, B', and C, at least the parts before you decide to record a commit.
+
+
+=== 2. Why the renames on MERGE_SIDE1 in any given pick are always a ===
+===    superset of the renames on MERGE_SIDE1 for the next pick.     ===
+
+The merge machinery uses the filenames it is fed from MERGE_BASE,
+MERGE_SIDE1, and MERGE_SIDE2.  It will only move content to a different
+filename under one of three conditions:
+
+  * To make both pieces of a conflict available to a user during conflict
+    resolution (examples: directory/file conflict, add/add type conflict
+    such as symlink vs. regular file)
+
+  * When MERGE_SIDE1 renames the file.
+
+  * When MERGE_SIDE2 renames the file.
+
+First, let's remember what commits are involved in the first and second
+picks of the cherry-pick or rebase sequence:
+
+To create A':
+   MERGE_BASE:   E
+   MERGE_SIDE1:  G
+   MERGE_SIDE2:  A
+
+To create B':
+   MERGE_BASE:   A
+   MERGE_SIDE1:  A'
+   MERGE_SIDE2:  B
+
+So, in particular, we need to show that the renames between E and G are a
+superset of those between A and A'.
+
+A' is created by the first merge.  A' will only have renames for one of the
+three reasons listed above.  The first case, a conflict, results in a
+situation where the cache is dropped and thus this optimization doesn't
+take effect, so we need not consider that case.  The third case, a rename
+on MERGE_SIDE2 (i.e. from G to A), will show up in A' but it also shows up
+in A -- therefore when diffing A and A' that path does not show up as a
+rename.  The only remaining way for renames to show up in A' is for the
+rename to come from MERGE_SIDE1.  Therefore, all renames between A and A'
+are a subset of those between E and G.  Equivalently, all renames between E
+and G are a superset of those between A and A'.
+
+
+=== 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_   ===
+===    always also a rename on MERGE_SIDE1 for the next pick.        ===
+
+Let's again look at the first two picks:
+
+To create A':
+   MERGE_BASE:   E
+   MERGE_SIDE1:  G
+   MERGE_SIDE2:  A
+
+To create B':
+   MERGE_BASE:   A
+   MERGE_SIDE1:  A'
+   MERGE_SIDE2:  B
+
+Now let's look at any given rename from MERGE_SIDE1 of the first pick, i.e.
+any given rename from E to G.  Let's use the filenames 'oldfile' and
+'newfile' for demonstration purposes.  That first pick will function as
+follows; when the rename is detected, the merge machinery will do a
+three-way content merge of the following:
+    E:oldfile
+    G:newfile
+    A:oldfile
+and produce a new result:
+    A':newfile
+
+Note above that I've assumed that E->A did not rename oldfile.  If that
+side did rename, then we most likely have a rename/rename(1to2) conflict
+that will cause the rebase or cherry-pick operation to halt and drop the
+in-memory cache of renames and thus doesn't need to be considered further.
+In the special case that E->A does rename the file but also renames it to
+newfile, then there is no conflict from the renaming and the merge can
+succeed.  In this special case, the rename is not valid to cache because
+the second merge will find A:newfile in the MERGE_BASE.  So a
+rename/rename(1to1) needs to be specially handled by pruning renames from
+the cache and decrementing the dir_rename_counts in the current and leading
+directories associated with those renames.  Or, since these are really
+rare, one could just take the easy way out and disable the remembering
+renames optimization when a rename/rename(1to1) happens.
+
+The previous paragraph handled the cases for E->A renaming oldfile, let's
+continue assuming that oldfile is not renamed in A.
+
+As per the diagram for creating B', MERGE_SIDE1 involves the changes from A
+to A'.  So, we are curious whether A:oldfile and A':newfile will be viewed
+as renames.  Note that:
+
+  * There will be no A':oldfile (because there could not have been a
+    G:oldfile as we do not do break detection in the merge machinery and
+    G:newfile was detected as a rename, and by the construction of the
+    rename above that merged cleanly, the merge machinery will ensure there
+    is no 'oldfile' in the result).
+
+  * There will be no A:newfile (if there had been, we would have had a
+    rename/add conflict).
+
+  * Clearly A:oldfile and A':newfile are "related" (A':newfile came from a
+    clean three-way content merge involving A:oldfile).
+
+We can also expound on the third point above, by noting that three-way
+content merges can also be viewed as applying the differences between the
+base and one side to the other side.  Thus we can view A':newfile as
+having been created by taking the changes between E:oldfile and G:newfile
+(which were detected as being related, i.e. <50% changed) to A:oldfile.
+
+Thus A:oldfile and A':newfile are just as related as E:oldfile and
+G:newfile are -- they have exactly identical differences.  Since the latter
+were detected as renames, A:oldfile and A':newfile should also be
+detectable as renames almost always.
+
+
+=== 4. A detailed description of the counter-examples to #3.         ===
+
+We already noted in section 3 that rename/rename(1to1) (i.e. both sides
+renaming a file the same way) was one counter-example.  The more
+interesting bit, though, is why did we need to use the "almost" qualifier
+when stating that A:oldfile and A':newfile are "almost" always detectable
+as renames?
+
+Let's repeat an earlier point that section 3 made:
+
+  A':newfile was created by applying the changes between E:oldfile and
+  G:newfile to A:oldfile.  The changes between E:oldfile and G:newfile were
+  <50% of the size of E:oldfile.
+
+If those changes that were <50% of the size of E:oldfile are also <50% of
+the size of A:oldfile, then A:oldfile and A':newfile will be detectable as
+renames.  However, if there is a dramatic size reduction between E:oldfile
+and A:oldfile (but the changes between E:oldfile, G:newfile, and A:oldfile
+still somehow merge cleanly), then traditional rename detection would not
+detect A:oldfile and A':newfile as renames.
+
+Here's an example where that can happen:
+  * E:oldfile had 20 lines
+  * G:newfile added 10 new lines at the beginning of the file
+  * A:oldfile kept the first 3 lines of the file, and deleted all the rest
+then
+  => A':newfile would have 13 lines, 3 of which matches those in A:oldfile.
+E:oldfile -> G:newfile would be detected as a rename, but A:oldfile and
+A':newfile would not be.
+
+
+=== 5. Why the special cases in #4 are still fully reasonable to use to    ===
+===    pair up files for three-way content merging in the merge machinery, ===
+===    and why they do not affect the correctness of the merge.            ===
+
+In the rename/rename(1to1) case, A:newfile and A':newfile are not renames
+since they use the *same* filename.  However, files with the same filename
+are obviously fine to pair up for three-way content merging (the merge
+machinery has never employed break detection).  The interesting
+counter-example case is thus not the rename/rename(1to1) case, but the case
+where A did not rename oldfile.  That was the case that we spent most of
+the time discussing in sections 3 and 4.  The remainder of this section
+will be devoted to that case as well.
+
+So, even if A:oldfile and A':newfile aren't detectable as renames, why is
+it still reasonable to pair them up for three-way content merging in the
+merge machinery?  There are multiple reasons:
+
+  * As noted in sections 3 and 4, the diff between A:oldfile and A':newfile
+    is *exactly* the same as the diff between E:oldfile and G:newfile.  The
+    latter pair were detected as renames, so it seems unlikely to surprise
+    users for us to treat A:oldfile and A':newfile as renames.
+
+  * In fact, "oldfile" and "newfile" were at one point detected as renames
+    due to how they were constructed in the E..G chain.  And we used that
+    information once already in this rebase/cherry-pick.  I think users
+    would be unlikely to be surprised at us continuing to treat the files
+    as renames and would quickly understand why we had done so.
+
+  * Marking or declaring files as renames is *not* the end goal for merges.
+    Merges use renames to determine which files make sense to be paired up
+    for three-way content merges.
+
+  * A:oldfile and A':newfile were _already_ paired up in a three-way
+    content merge; that is how A':newfile was created.  In fact, that
+    three-way content merge was clean.  So using them again in a later
+    three-way content merge seems very reasonable.
+
+However, the above is focusing on the common scenarios.  Let's try to look
+at all possible unusual scenarios and compare without the optimization to
+with the optimization.  Consider the following theoretical cases; we will
+then dive into each to determine which of them are possible,
+and if so, what they mean:
+
+  1. Without the optimization, the second merge results in a conflict.
+     With the optimization, the second merge also results in a conflict.
+     Questions: Are the conflicts confusingly different?  Better in one case?
+
+  2. Without the optimization, the second merge results in NO conflict.
+     With the optimization, the second merge also results in NO conflict.
+     Questions: Are the merges the same?
+
+  3. Without the optimization, the second merge results in a conflict.
+     With the optimization, the second merge results in NO conflict.
+     Questions: Possible?  Bug, bugfix, or something else?
+
+  4. Without the optimization, the second merge results in NO conflict.
+     With the optimization, the second merge results in a conflict.
+     Questions: Possible?  Bug, bugfix, or something else?
+
+I'll consider all four cases, but out of order.
+
+The fourth case is impossible.  For the code without the remembering
+renames optimization to not get a conflict, B:oldfile would need to exactly
+match A:oldfile -- if it doesn't, there would be a modify/delete conflict.
+If A:oldfile matches B:oldfile exactly, then a three-way content merge
+between A:oldfile, A':newfile, and B:oldfile would have no conflict and
+just give us the version of newfile from A' as the result.
+
+From the same logic as the above paragraph, the second case would indeed
+result in identical merges.  When A:oldfile exactly matches B:oldfile, an
+undetected rename would say, "Oh, I see one side didn't modify 'oldfile'
+and the other side deleted it.  I'll delete it.  And I see you have this
+brand new file named 'newfile' in A', so I'll keep it."  That gives the
+same results as three-way content merging A:oldfile, A':newfile, and
+B:oldfile -- a removal of oldfile with the version of newfile from A'
+showing up in the result.
+
+The third case is interesting.  It means that A:oldfile and A':newfile were
+not just similar enough, but that the changes between them did not conflict
+with the changes between A:oldfile and B:oldfile.  This would validate our
+hunch that the files were similar enough to be used in a three-way content
+merge, and thus seems entirely correct for us to have used them that way.
+(Sidenote: One particular example here may be enlightening.  Let's say that
+B was an immediate revert of A.  B clearly would have been a clean revert
+of A, since A was B's immediate parent.  One would assume that if you can
+pick a commit, you should also be able to cherry-pick its immediate revert.
+However, this is one of those funny corner cases; without this
+optimization, we just successfully picked a commit cleanly, but we are
+unable to cherry-pick its immediate revert due to the size differences
+between E:oldfile and A:oldfile.)
+
+That leaves only the first case to consider -- when we get conflicts both
+with or without the optimization.  Without the optimization, we'll have a
+modify/delete conflict, where both A':newfile and B:oldfile are left in the
+tree for the user to deal with and no hints about the potential similarity
+between the two.  With the optimization, we'll have a three-way content
+merged A:oldfile, A':newfile, and B:oldfile with conflict markers
+suggesting we thought the files were related but giving the user the chance
+to resolve.  As noted above, I don't think users will find us treating
+'oldfile' and 'newfile' as related as a surprise since they were between E
+and G.  In any event, though, this case shouldn't be concerning since we
+hit a conflict in both cases, told the user what we know, and asked them to
+resolve it.
+
+So, in summary, case 4 is impossible, case 2 yields the same behavior, and
+cases 1 and 3 seem to provide as good or better behavior with the
+optimization than without.
+
+
+=== 6. Interaction with skipping of "irrelevant" renames ===
+
+Previous optimizations involved skipping rename detection for paths
+considered to be "irrelevant".  See for example the following commits:
+
+  * 32a56dfb99 ("merge-ort: precompute subset of sources for which we
+		need rename detection", 2021-03-11)
+  * 2fd9eda462 ("merge-ort: precompute whether directory rename
+		detection is needed", 2021-03-11)
+  * 9bd342137e ("diffcore-rename: determine which relevant_sources are
+		no longer relevant", 2021-03-13)
+
+Relevance is always determined by what the _other_ side of history has
+done, in terms of modifing a file that our side renamed, or adding a
+file to a directory which our side renamed.  This means that a path
+that is "irrelevant" when picking the first commit of a series in a
+rebase or cherry-pick, may suddenly become "relevant" when picking the
+next commit.
+
+The upshot of this is that we can only cache rename detection results
+for relevant paths, and need to re-check relevance in subsequent
+commits.  If those subsequent commits have additional paths that are
+relevant for rename detection, then we will need to redo rename
+detection -- though we can limit it to the paths for which we have not
+already detected renames.
+
+
+=== 7. Additional items that need to be cached ===
+
+It turns out we have to cache more than just renames; we also cache:
+
+  A) non-renames (i.e. unpaired deletes)
+  B) counts of renames within directories
+  C) sources that were marked as RELEVANT_LOCATION, but which were
+     downgraded to RELEVANT_NO_MORE
+  D) the toplevel trees involved in the merge
+
+These are all stored in struct rename_info, and respectively appear in
+  * cached_pairs (along side actual renames, just with a value of NULL)
+  * dir_rename_counts
+  * cached_irrelevant
+  * merge_trees
+
+The reason for (A) comes from the irrelevant renames skipping
+optimization discussed in section 6.  The fact that irrelevant renames
+are skipped means we only get a subset of the potential renames
+detected and subsequent commits may need to run rename detection on
+the upstream side on a subset of the remaining renames (to get the
+renames that are relevant for that later commit).  Since unpaired
+deletes are involved in rename detection too, we don't want to
+repeatedly check that those paths remain unpaired on the upstream side
+with every commit we are transplanting.
+
+The reason for (B) is that diffcore_rename_extended() is what
+generates the counts of renames by directory which is needed in
+directory rename detection, and if we don't run
+diffcore_rename_extended() again then we need to have the output from
+it, including dir_rename_counts, from the previous run.
+
+The reason for (C) is that merge-ort's tree traversal will again think
+those paths are relevant (marking them as RELEVANT_LOCATION), but the
+fact that they were downgraded to RELEVANT_NO_MORE means that
+dir_rename_counts already has the information we need for directory
+rename detection.  (A path which becomes RELEVANT_CONTENT in a
+subsequent commit will be removed from cached_irrelevant.)
+
+The reason for (D) is that is how we determine whether the remember
+renames optimization can be used.  In particular, remembering that our
+sequence of merges looks like:
+
+   Merge 1:
+   MERGE_BASE:   E
+   MERGE_SIDE1:  G
+   MERGE_SIDE2:  A
+   => Creates    A'
+
+   Merge 2:
+   MERGE_BASE:   A
+   MERGE_SIDE1:  A'
+   MERGE_SIDE2:  B
+   => Creates    B'
+
+It is the fact that the trees A and A' appear both in Merge 1 and in
+Merge 2, with A as a parent of A' that allows this optimization.  So
+we store the trees to compare with what we are asked to merge next
+time.
+
+
+=== 8. How directory rename detection interacts with the above and   ===
+===    why this optimization is still safe even if                   ===
+===    merge.directoryRenames is set to "true".                      ===
+
+As noted in the assumptions section:
+
+    """
+    ...if directory renames do occur, then the default of
+    merge.directoryRenames being set to "conflict" means that the operation
+    will stop for users to resolve the conflicts and the cache will be
+    thrown away, and thus that there won't be an optimization to apply.
+    So, the only reason we need to address directory renames specifically,
+    is that some users will have set merge.directoryRenames to "true" to
+    allow the merges to continue to proceed automatically.
+    """
+
+Let's remember that we need to look at how any given pick affects the next
+one.  So let's again use the first two picks from the diagram in section
+one:
+
+  First pick does this three-way merge:
+    MERGE_BASE:   E
+    MERGE_SIDE1:  G
+    MERGE_SIDE2:  A
+    => creates A'
+
+  Second pick does this three-way merge:
+    MERGE_BASE:   A
+    MERGE_SIDE1:  A'
+    MERGE_SIDE2:  B
+    => creates B'
+
+Now, directory rename detection exists so that if one side of history
+renames a directory, and the other side adds a new file to the old
+directory, then the merge (with merge.directoryRenames=true) can move the
+file into the new directory.  There are two qualitatively different ways to
+add a new file to an old directory: create a new file, or rename a file
+into that directory.  Also, directory renames can be done on either side of
+history, so there are four cases to consider:
+
+  * MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to   old dir
+  * MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames  file into old dir
+  * MERGE_SIDE1 adds new file to   old dir, MERGE_SIDE2 renames old dir
+  * MERGE_SIDE1 renames  file into old dir, MERGE_SIDE2 renames old dir
+
+One last note before we consider these four cases: There are some
+important properties about how we implement this optimization with
+respect to directory rename detection that we need to bear in mind
+while considering all of these cases:
+
+  * rename caching occurs *after* applying directory renames
+
+  * a rename created by directory rename detection is recorded for the side
+    of history that did the directory rename.
+
+  * dir_rename_counts, the nested map of
+	{oldname => {newname => count}},
+    is cached between runs as well.  This basically means that directory
+    rename detection is also cached, though only on the side of history
+    that we cache renames for (MERGE_SIDE1 as far as this document is
+    concerned; see the assumptions section).  Two interesting sub-notes
+    about these counts:
+
+    * If we need to perform rename-detection again on the given side (e.g.
+      some paths are relevant for rename detection that weren't before),
+      then we clear dir_rename_counts and recompute it, making use of
+      cached_pairs.  The reason it is important to do this is optimizations
+      around RELEVANT_LOCATION exist to prevent us from computing
+      unnecessary renames for directory rename detection and from computing
+      dir_rename_counts for irrelevant directories; but those same renames
+      or directories may become necessary for subsequent merges.  The
+      easiest way to "fix up" dir_rename_counts in such cases is to just
+      recompute it.
+
+    * If we prune rename/rename(1to1) entries from the cache, then we also
+      need to update dir_rename_counts to decrement the counts for the
+      involved directory and any relevant parent directories (to undo what
+      update_dir_rename_counts() in diffcore-rename.c incremented when the
+      rename was initially found).  If we instead just disable the
+      remembering renames optimization when the exceedingly rare
+      rename/rename(1to1) cases occur, then dir_rename_counts will get
+      re-computed the next time rename detection occurs, as noted above.
+
+  * the side with multiple commits to pick, is the side of history that we
+    do NOT cache renames for.  Thus, there are no additional commits to
+    change the number of renames in a directory, except for those done by
+    directory rename detection (which always pad the majority).
+
+  * the "renames" we cache are modified slightly by any directory rename,
+    as noted below.
+
+Now, with those notes out of the way, let's go through the four cases
+in order:
+
+Case 1: MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir
+
+  This case looks like this:
+
+    MERGE_BASE:   E,   Has olddir/
+    MERGE_SIDE1:  G,   Renames olddir/ -> newdir/
+    MERGE_SIDE2:  A,   Adds olddir/newfile
+    => creates    A',  With newdir/newfile
+
+    MERGE_BASE:   A,   Has olddir/newfile
+    MERGE_SIDE1:  A',  Has newdir/newfile
+    MERGE_SIDE2:  B,   Modifies olddir/newfile
+    => expected   B',  with threeway-merged newdir/newfile from above
+
+  In this case, with the optimization, note that after the first commit:
+    * MERGE_SIDE1 remembers olddir/ -> newdir/
+    * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile
+  Given the cached rename noted above, the second merge can proceed as
+  expected without needing to perform rename detection from A -> A'.
+
+Case 2: MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames  file into old dir
+
+  This case looks like this:
+    MERGE_BASE:   E    oldfile, olddir/
+    MERGE_SIDE1:  G    oldfile, olddir/ -> newdir/
+    MERGE_SIDE2:  A    oldfile -> olddir/newfile
+    => creates    A',  With newdir/newfile representing original oldfile
+
+    MERGE_BASE:   A    olddir/newfile
+    MERGE_SIDE1:  A'   newdir/newfile
+    MERGE_SIDE2:  B    modify olddir/newfile
+    => expected   B',  with threeway-merged newdir/newfile from above
+
+  In this case, with the optimization, note that after the first commit:
+    * MERGE_SIDE1 remembers olddir/ -> newdir/
+    * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile
+		  (NOT oldfile -> newdir/newfile; compare to case with
+		   (p->status == 'R' && new_path) in possibly_cache_new_pair())
+
+  Given the cached rename noted above, the second merge can proceed as
+  expected without needing to perform rename detection from A -> A'.
+
+Case 3: MERGE_SIDE1 adds new file to   old dir, MERGE_SIDE2 renames old dir
+
+  This case looks like this:
+
+    MERGE_BASE:   E,   Has olddir/
+    MERGE_SIDE1:  G,   Adds olddir/newfile
+    MERGE_SIDE2:  A,   Renames olddir/ -> newdir/
+    => creates    A',  With newdir/newfile
+
+    MERGE_BASE:   A,   Has newdir/, but no notion of newdir/newfile
+    MERGE_SIDE1:  A',  Has newdir/newfile
+    MERGE_SIDE2:  B,   Has newdir/, but no notion of newdir/newfile
+    => expected   B',  with newdir/newfile from A'
+
+  In this case, with the optimization, note that after the first commit there
+  were no renames on MERGE_SIDE1, and any renames on MERGE_SIDE2 are tossed.
+  But the second merge didn't need any renames so this is fine.
+
+Case 4: MERGE_SIDE1 renames  file into old dir, MERGE_SIDE2 renames old dir
+
+  This case looks like this:
+
+    MERGE_BASE:   E,   Has olddir/
+    MERGE_SIDE1:  G,   Renames oldfile -> olddir/newfile
+    MERGE_SIDE2:  A,   Renames olddir/ -> newdir/
+    => creates    A',  With newdir/newfile representing original oldfile
+
+    MERGE_BASE:   A,   Has oldfile
+    MERGE_SIDE1:  A',  Has newdir/newfile
+    MERGE_SIDE2:  B,   Modifies oldfile
+    => expected   B',  with threeway-merged newdir/newfile from above
+
+  In this case, with the optimization, note that after the first commit:
+    * MERGE_SIDE1 remembers oldfile -> newdir/newfile
+		  (NOT oldfile -> olddir/newfile; compare to case of second
+		   block under p->status == 'R' in possibly_cache_new_pair())
+    * MERGE_SIDE2 renames are tossed because only MERGE_SIDE1 is remembered
+
+  Given the cached rename noted above, the second merge can proceed as
+  expected without needing to perform rename detection from A -> A'.
+
+Finally, I'll just note here that interactions with the
+skip-irrelevant-renames optimization means we sometimes don't detect
+renames for any files within a directory that was renamed, in which
+case we will not have been able to detect any rename for the directory
+itself.  In such a case, we do not know whether the directory was
+renamed; we want to be careful to avoid cacheing some kind of "this
+directory was not renamed" statement.  If we did, then a subsequent
+commit being rebased could add a file to the old directory, and the
+user would expect it to end up in the correct directory -- something
+our erroneous "this directory was not renamed" cache would preclude.

From caba91c373127a316b5442a6aac7080fee9bd624 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:31 +0000
Subject: [PATCH 036/110] fast-rebase: change assert() to BUG()

assert() can succinctly document expectations for the code, and do so in
a way that may be useful to future folks trying to refactor the code and
change basic assumptions; it allows them to more quickly find some
places where their violations of previous assumptions trips things up.

Unfortunately, assert() can surround a function call with important
side-effects, which is a huge mistake since some users will compile with
assertions disabled.  I've had to debug such mistakes before in other
codebases, so I should know better.  Luckily, this was only in test
code, but it's still very embarrassing.  Change an assert() to an if
(...) BUG (...).

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-fast-rebase.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/helper/test-fast-rebase.c b/t/helper/test-fast-rebase.c
index 373212256a6628..39fb7f41e8c15c 100644
--- a/t/helper/test-fast-rebase.c
+++ b/t/helper/test-fast-rebase.c
@@ -124,7 +124,8 @@ int cmd__fast_rebase(int argc, const char **argv)
 	assert(oideq(&onto->object.oid, &head));
 
 	hold_locked_index(&lock, LOCK_DIE_ON_ERROR);
-	assert(repo_read_index(the_repository) >= 0);
+	if (repo_read_index(the_repository) < 0)
+		BUG("Could not read index");
 
 	repo_init_revisions(the_repository, &revs, NULL);
 	revs.verbose_header = 1;

From f9500261e0aea2bebb527281462d650be1db38a4 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:32 +0000
Subject: [PATCH 037/110] fast-rebase: write conflict state to working tree,
 index, and HEAD

Previously, when fast-rebase hit a conflict, it simply aborted and left
HEAD, the index, and the working tree where they were before the
operation started.  While fast-rebase does not support restarting from a
conflicted state, write the conflicted state out anyway as it gives us a
way to see what the conflicts are and write tests that check for them.

This will be important in the upcoming commits, because sequencer.c is
only superficially integrated with merge-ort.c; in particular, it calls
merge_switch_to_result() after EACH merge instead of only calling it at
the end of all the sequence of merges (or when a conflict is hit).  This
not only causes needless updates to the working copy and index, but also
causes all intermediate data to be freed and tossed, preventing caching
information from one merge to the next.  However, integrating
sequencer.c more deeply with merge-ort.c is a big task, and making this
small extension to fast-rebase.c provides us with a simple way to test
the edge and corner cases that we want to make sure continue working.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-fast-rebase.c | 51 +++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/t/helper/test-fast-rebase.c b/t/helper/test-fast-rebase.c
index 39fb7f41e8c15c..fc2d460904340a 100644
--- a/t/helper/test-fast-rebase.c
+++ b/t/helper/test-fast-rebase.c
@@ -91,7 +91,6 @@ int cmd__fast_rebase(int argc, const char **argv)
 	struct commit *last_commit = NULL, *last_picked_commit = NULL;
 	struct object_id head;
 	struct lock_file lock = LOCK_INIT;
-	int clean = 1;
 	struct strvec rev_walk_args = STRVEC_INIT;
 	struct rev_info revs;
 	struct commit *commit;
@@ -176,11 +175,10 @@ int cmd__fast_rebase(int argc, const char **argv)
 		free((char*)merge_opt.ancestor);
 		merge_opt.ancestor = NULL;
 		if (!result.clean)
-			die("Aborting: Hit a conflict and restarting is not implemented.");
+			break;
 		last_picked_commit = commit;
 		last_commit = create_commit(result.tree, commit, last_commit);
 	}
-	fprintf(stderr, "\nDone.\n");
 	/* TODO: There should be some kind of rev_info_free(&revs) call... */
 	memset(&revs, 0, sizeof(revs));
 
@@ -189,24 +187,39 @@ int cmd__fast_rebase(int argc, const char **argv)
 	if (result.clean < 0)
 		exit(128);
 
-	strbuf_addf(&reflog_msg, "finish rebase %s onto %s",
-		    oid_to_hex(&last_picked_commit->object.oid),
-		    oid_to_hex(&last_commit->object.oid));
-	if (update_ref(reflog_msg.buf, branch_name.buf,
-		       &last_commit->object.oid,
-		       &last_picked_commit->object.oid,
-		       REF_NO_DEREF, UPDATE_REFS_MSG_ON_ERR)) {
-		error(_("could not update %s"), argv[4]);
-		die("Failed to update %s", argv[4]);
+	if (result.clean) {
+		fprintf(stderr, "\nDone.\n");
+		strbuf_addf(&reflog_msg, "finish rebase %s onto %s",
+			    oid_to_hex(&last_picked_commit->object.oid),
+			    oid_to_hex(&last_commit->object.oid));
+		if (update_ref(reflog_msg.buf, branch_name.buf,
+			       &last_commit->object.oid,
+			       &last_picked_commit->object.oid,
+			       REF_NO_DEREF, UPDATE_REFS_MSG_ON_ERR)) {
+			error(_("could not update %s"), argv[4]);
+			die("Failed to update %s", argv[4]);
+		}
+		if (create_symref("HEAD", branch_name.buf, reflog_msg.buf) < 0)
+			die(_("unable to update HEAD"));
+		strbuf_release(&reflog_msg);
+		strbuf_release(&branch_name);
+
+		prime_cache_tree(the_repository, the_repository->index,
+				 result.tree);
+	} else {
+		fprintf(stderr, "\nAborting: Hit a conflict.\n");
+		strbuf_addf(&reflog_msg, "rebase progress up to %s",
+			    oid_to_hex(&last_picked_commit->object.oid));
+		if (update_ref(reflog_msg.buf, "HEAD",
+			       &last_commit->object.oid,
+			       &head,
+			       REF_NO_DEREF, UPDATE_REFS_MSG_ON_ERR)) {
+			error(_("could not update %s"), argv[4]);
+			die("Failed to update %s", argv[4]);
+		}
 	}
-	if (create_symref("HEAD", branch_name.buf, reflog_msg.buf) < 0)
-		die(_("unable to update HEAD"));
-	strbuf_release(&reflog_msg);
-	strbuf_release(&branch_name);
-
-	prime_cache_tree(the_repository, the_repository->index, result.tree);
 	if (write_locked_index(&the_index, &lock,
 			       COMMIT_LOCK | SKIP_IF_UNCHANGED))
 		die(_("unable to write %s"), get_index_file());
-	return (clean == 0);
+	return (result.clean == 0);
 }

From a22099f552d5e67dc71c2bd92c4b711387e9695f Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:33 +0000
Subject: [PATCH 038/110] t6429: testcases for remembering renames

We will soon be adding an optimization that caches (in memory only,
never written to disk) upstream renames during a sequence of merges such
as occurs during a cherry-pick or rebase operation.  Add several tests
meant to stress such an implementation to ensure it does the right
thing, and include a test whose outcome we will later change due to this
optimization as well.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .../technical/remembering-renames.txt         |  14 +-
 t/t6429-merge-sequence-rename-caching.sh      | 692 ++++++++++++++++++
 2 files changed, 700 insertions(+), 6 deletions(-)
 create mode 100755 t/t6429-merge-sequence-rename-caching.sh

diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt
index 251ce477e059da..2fd5cc88e0b808 100644
--- a/Documentation/technical/remembering-renames.txt
+++ b/Documentation/technical/remembering-renames.txt
@@ -214,12 +214,14 @@ in-memory cache of renames and thus doesn't need to be considered further.
 In the special case that E->A does rename the file but also renames it to
 newfile, then there is no conflict from the renaming and the merge can
 succeed.  In this special case, the rename is not valid to cache because
-the second merge will find A:newfile in the MERGE_BASE.  So a
-rename/rename(1to1) needs to be specially handled by pruning renames from
-the cache and decrementing the dir_rename_counts in the current and leading
-directories associated with those renames.  Or, since these are really
-rare, one could just take the easy way out and disable the remembering
-renames optimization when a rename/rename(1to1) happens.
+the second merge will find A:newfile in the MERGE_BASE (see also the new
+testcases in t6429 with "rename same file identically" in their
+description).  So a rename/rename(1to1) needs to be specially handled by
+pruning renames from the cache and decrementing the dir_rename_counts in
+the current and leading directories associated with those renames.  Or,
+since these are really rare, one could just take the easy way out and
+disable the remembering renames optimization when a rename/rename(1to1)
+happens.
 
 The previous paragraph handled the cases for E->A renaming oldfile, let's
 continue assuming that oldfile is not renamed in A.
diff --git a/t/t6429-merge-sequence-rename-caching.sh b/t/t6429-merge-sequence-rename-caching.sh
new file mode 100755
index 00000000000000..f47d8924ee730d
--- /dev/null
+++ b/t/t6429-merge-sequence-rename-caching.sh
@@ -0,0 +1,692 @@
+#!/bin/sh
+
+test_description="remember regular & dir renames in sequence of merges"
+
+. ./test-lib.sh
+
+#
+# NOTE 1: this testfile tends to not only rename files, but modify on both
+#         sides; without modifying on both sides, optimizations can kick in
+#         which make rename detection irrelevant or trivial.  We want to make
+#         sure that we are triggering rename caching rather than rename
+#         bypassing.
+#
+# NOTE 2: this testfile uses 'test-tool fast-rebase' instead of either
+#         cherry-pick or rebase.  sequencer.c is only superficially
+#         integrated with merge-ort; it calls merge_switch_to_result()
+#         after EACH merge, which updates the index and working copy AND
+#         throws away the cached results (because merge_switch_to_result()
+#         is only supposed to be called at the end of the sequence).
+#         Integrating them more deeply is a big task, so for now the tests
+#         use 'test-tool fast-rebase'.
+#
+
+
+#
+# In the following simple testcase:
+#   Base:     numbers_1, values_1
+#   Upstream: numbers_2, values_2
+#   Topic_1:  sequence_3
+#   Topic_2:  scruples_3
+# or, in english, rename numbers -> sequence in the first commit, and rename
+# values -> scruples in the second commit.
+#
+# This shouldn't be a challenge, it's just verifying that cached renames isn't
+# preventing us from finding new renames.
+#
+test_expect_success 'caching renames does not preclude finding new ones' '
+	test_create_repo caching-renames-and-new-renames &&
+	(
+		cd caching-renames-and-new-renames &&
+
+		test_seq 2 10 >numbers &&
+		test_seq 2 10 >values &&
+		git add numbers values &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 1 10 >numbers &&
+		test_seq 1 10 >values &&
+		git add numbers values &&
+		git commit -m "Tweaked both files" &&
+
+		git switch topic &&
+
+		test_seq 2 12 >numbers &&
+		git add numbers &&
+		git mv numbers sequence &&
+		git commit -m A &&
+
+		test_seq 2 12 >values &&
+		git add values &&
+		git mv values scruples &&
+		git commit -m B &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream~1..topic
+
+		git ls-files >tracked-files &&
+		test_line_count = 2 tracked-files &&
+		test_seq 1 12 >expect &&
+		test_cmp expect sequence &&
+		test_cmp expect scruples
+	)
+'
+
+#
+# In the following testcase:
+#   Base:     numbers_1
+#   Upstream: rename numbers_1 -> sequence_2
+#   Topic_1:  numbers_3
+#   Topic_2:  numbers_1
+# or, in english, the first commit on the topic branch modifies numbers by
+# shrinking it (dramatically) and the second commit on topic reverts its
+# parent.
+#
+# Can git apply both patches?
+#
+# Traditional cherry-pick/rebase will fail to apply the second commit, the
+# one that reverted its parent, because despite detecting the rename from
+# 'numbers' to 'sequence' for the first commit, it fails to detect that
+# rename when picking the second commit.  That's "reasonable" given the
+# dramatic change in size of the file, but remembering the rename and
+# reusing it is reasonable too.
+#
+# Rename detection (diffcore_rename_extended()) will run twice here; it is
+# not needed on the topic side of history for either of the two commits
+# being merged, but it is needed on the upstream side of history for each
+# commit being picked.
+test_expect_success 'cherry-pick both a commit and its immediate revert' '
+	test_create_repo pick-commit-and-its-immediate-revert &&
+	(
+		cd pick-commit-and-its-immediate-revert &&
+
+		test_seq 11 30 >numbers &&
+		git add numbers &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 1 30 >numbers &&
+		git add numbers &&
+		git mv numbers sequence &&
+		git commit -m "Renamed (and modified) numbers -> sequence" &&
+
+		git switch topic &&
+
+		test_seq 11 13 >numbers &&
+		git add numbers &&
+		git commit -m A &&
+
+		git revert HEAD &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test_might_fail test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream~1..topic &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 2 calls
+	)
+'
+
+#
+# In the following testcase:
+#   Base:     sequence_1
+#   Upstream: rename sequence_1 -> values_2
+#   Topic_1:  rename sequence_1 -> values_3
+#   Topic_2:  add unrelated sequence_4
+# or, in english, both sides rename sequence -> values, and then the second
+# commit on the topic branch adds an unrelated file called sequence.
+#
+# This testcase presents no problems for git traditionally, but having both
+# sides do the same rename in effect "uses it up" and if it remains cached,
+# could cause a spurious rename/add conflict.
+#
+test_expect_success 'rename same file identically, then reintroduce it' '
+	test_create_repo rename-rename-1to1-then-add-old-filename &&
+	(
+		cd rename-rename-1to1-then-add-old-filename &&
+
+		test_seq 3 8 >sequence &&
+		git add sequence &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 1 8 >sequence &&
+		git add sequence &&
+		git mv sequence values &&
+		git commit -m "Renamed (and modified) sequence -> values" &&
+
+		git switch topic &&
+
+		test_seq 3 10 >sequence &&
+		git add sequence &&
+		git mv sequence values &&
+		git commit -m A &&
+
+		test_write_lines A B C D E F G H I J >sequence &&
+		git add sequence &&
+		git commit -m B &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream~1..topic &&
+
+		git ls-files >tracked &&
+		test_line_count = 2 tracked &&
+		test_path_is_file values &&
+		test_path_is_file sequence &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 2 calls
+	)
+'
+
+#
+# In the following testcase:
+#   Base:     olddir/{valuesZ_1, valuesY_1, valuesX_1}
+#   Upstream: rename olddir/valuesZ_1 -> dirA/valuesZ_2
+#             rename olddir/valuesY_1 -> dirA/valuesY_2
+#             rename olddir/valuesX_1 -> dirB/valuesX_2
+#   Topic_1:  rename olddir/valuesZ_1 -> dirA/valuesZ_3
+#             rename olddir/valuesY_1 -> dirA/valuesY_3
+#   Topic_2:  add olddir/newfile
+#   Expected Pick1: dirA/{valuesZ, valuesY}, dirB/valuesX
+#   Expected Pick2: dirA/{valuesZ, valuesY}, dirB/{valuesX, newfile}
+#
+# This testcase presents no problems for git traditionally, but having both
+# sides do the same renames in effect "use it up" but if the renames remain
+# cached, the directory rename could put newfile in the wrong directory.
+#
+test_expect_success 'rename same file identically, then add file to old dir' '
+	test_create_repo rename-rename-1to1-then-add-file-to-old-dir &&
+	(
+		cd rename-rename-1to1-then-add-file-to-old-dir &&
+
+		mkdir olddir/ &&
+		test_seq 3 8 >olddir/valuesZ &&
+		test_seq 3 8 >olddir/valuesY &&
+		test_seq 3 8 >olddir/valuesX &&
+		git add olddir &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 1 8 >olddir/valuesZ &&
+		test_seq 1 8 >olddir/valuesY &&
+		test_seq 1 8 >olddir/valuesX &&
+		git add olddir &&
+		mkdir dirA &&
+		git mv olddir/valuesZ olddir/valuesY dirA &&
+		git mv olddir/ dirB/ &&
+		git commit -m "Renamed (and modified) values*" &&
+
+		git switch topic &&
+
+		test_seq 3 10 >olddir/valuesZ &&
+		test_seq 3 10 >olddir/valuesY &&
+		git add olddir &&
+		mkdir dirA &&
+		git mv olddir/valuesZ olddir/valuesY dirA &&
+		git commit -m A &&
+
+		>olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m B &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream~1..topic &&
+
+		git ls-files >tracked &&
+		test_line_count = 4 tracked &&
+		test_path_is_file dirA/valuesZ &&
+		test_path_is_file dirA/valuesY &&
+		test_path_is_file dirB/valuesX &&
+		test_path_is_file dirB/newfile &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 3 calls
+	)
+'
+
+#
+# In the following testcase, upstream renames a directory, and the topic branch
+# first adds a file to the directory, then later renames the directory
+# differently:
+#   Base:     olddir/a
+#             olddir/b
+#   Upstream: rename olddir/ -> newdir/
+#   Topic_1:  add olddir/newfile
+#   Topic_2:  rename olddir/ -> otherdir/
+#
+# Here we are just concerned that cached renames might prevent us from seeing
+# the rename conflict, and we want to ensure that we do get a conflict.
+#
+# While at it, also test that we do rename detection three times.  We have to
+# detect renames on the upstream side of history once for each merge, plus
+# Topic_2 has renames.
+#
+test_expect_success 'cached dir rename does not prevent noticing later conflict' '
+	test_create_repo dir-rename-cache-not-occluding-later-conflict &&
+	(
+		cd dir-rename-cache-not-occluding-later-conflict &&
+
+		mkdir olddir &&
+		test_seq 3 10 >olddir/a &&
+		test_seq 3 10 >olddir/b &&
+		git add olddir &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 3 10 >olddir/a &&
+		test_seq 3 10 >olddir/b &&
+		git add olddir &&
+		git mv olddir newdir &&
+		git commit -m "Dir renamed" &&
+
+		git switch topic &&
+
+		>olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m A &&
+
+		test_seq 1 8 >olddir/a &&
+		test_seq 1 8 >olddir/b &&
+		git add olddir &&
+		git mv olddir otherdir &&
+		git commit -m B &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test_must_fail test-tool fast-rebase --onto HEAD upstream~1 topic >output &&
+		#git cherry-pick upstream..topic &&
+
+		grep CONFLICT..rename/rename output &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 3 calls
+	)
+'
+
+# Helper for the next two tests
+test_setup_upstream_rename () {
+	test_create_repo $1 &&
+	(
+		cd $1 &&
+
+		test_seq 3 8 >somefile &&
+		test_seq 3 8 >relevant-rename &&
+		git add somefile relevant-rename &&
+		mkdir olddir &&
+		test_write_lines a b c d e f g >olddir/a &&
+		test_write_lines z y x w v u t >olddir/b &&
+		git add olddir &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch upstream &&
+		test_seq 1 8 >somefile &&
+		test_seq 1 8 >relevant-rename &&
+		git add somefile relevant-rename &&
+		git mv relevant-rename renamed &&
+		echo h >>olddir/a &&
+		echo s >>olddir/b &&
+		git add olddir &&
+		git mv olddir newdir &&
+		git commit -m "Dir renamed"
+	)
+}
+
+#
+# In the following testcase, upstream renames a file in the toplevel directory
+# as well as its only directory:
+#   Base:     relevant-rename_1
+#             somefile
+#             olddir/a
+#             olddir/b
+#   Upstream: rename relevant-rename_1 -> renamed_2
+#             rename olddir/           -> newdir/
+#   Topic_1:  relevant-rename_3
+#   Topic_2:  olddir/newfile_1
+#   Topic_3:  olddir/newfile_2
+#
+# In this testcase, since the first commit being picked only modifies a
+# file in the toplevel directory, the directory rename is irrelevant for
+# that first merge.  However, we need to notice the directory rename for
+# the merge that picks the second commit, and we don't want the third
+# commit to mess up its location either.  We want to make sure that
+# olddir/newfile doesn't exist in the result and that newdir/newfile does.
+#
+# We also expect rename detection to occur three times.  Although it is
+# typically needed two times per commit, there are no deleted files on the
+# topic side of history, so we only need to detect renames on the upstream
+# side for each of the 3 commits we need to pick.
+#
+test_expect_success 'dir rename unneeded, then add new file to old dir' '
+	test_setup_upstream_rename dir-rename-unneeded-until-new-file &&
+	(
+		cd dir-rename-unneeded-until-new-file &&
+
+		git switch topic &&
+
+		test_seq 3 10 >relevant-rename &&
+		git add relevant-rename &&
+		git commit -m A &&
+
+		echo foo >olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m B &&
+
+		echo bar >>olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m C &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream..topic &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 3 calls &&
+
+		git ls-files >tracked &&
+		test_line_count = 5 tracked &&
+		test_path_is_missing olddir/newfile &&
+		test_path_is_file newdir/newfile
+	)
+'
+
+#
+# The following testcase is *very* similar to the last one, but instead of
+# adding a new olddir/newfile, it renames somefile -> olddir/newfile:
+#   Base:     relevant-rename_1
+#             somefile_1
+#             olddir/a
+#             olddir/b
+#   Upstream: rename relevant-rename_1 -> renamed_2
+#             rename olddir/           -> newdir/
+#   Topic_1:  relevant-rename_3
+#   Topic_2:  rename somefile -> olddir/newfile_2
+#   Topic_3:  modify olddir/newfile_3
+#
+# In this testcase, since the first commit being picked only modifies a
+# file in the toplevel directory, the directory rename is irrelevant for
+# that first merge.  However, we need to notice the directory rename for
+# the merge that picks the second commit, and we don't want the third
+# commit to mess up its location either.  We want to make sure that
+# neither somefile or olddir/newfile exists in the result and that
+# newdir/newfile does.
+#
+# This testcase needs one more call to rename detection than the last
+# testcase, because of the somefile -> olddir/newfile rename in Topic_2.
+test_expect_success 'dir rename unneeded, then rename existing file into old dir' '
+	test_setup_upstream_rename dir-rename-unneeded-until-file-moved-inside &&
+	(
+		cd dir-rename-unneeded-until-file-moved-inside &&
+
+		git switch topic &&
+
+		test_seq 3 10 >relevant-rename &&
+		git add relevant-rename &&
+		git commit -m A &&
+
+		test_seq 1 10 >somefile &&
+		git add somefile &&
+		git mv somefile olddir/newfile &&
+		git commit -m B &&
+
+		test_seq 1 12 >olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m C &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream..topic &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 4 calls &&
+
+		test_path_is_missing somefile &&
+		test_path_is_missing olddir/newfile &&
+		test_path_is_file newdir/newfile &&
+		git ls-files >tracked &&
+		test_line_count = 4 tracked
+	)
+'
+
+# Helper for the next two tests
+test_setup_topic_rename () {
+	test_create_repo $1 &&
+	(
+		cd $1 &&
+
+		test_seq 3 8 >somefile &&
+		mkdir olddir &&
+		test_seq 3 8 >olddir/a &&
+		echo b >olddir/b &&
+		git add olddir somefile &&
+		git commit -m orig &&
+
+		git branch upstream &&
+		git branch topic &&
+
+		git switch topic &&
+		test_seq 1 8 >somefile &&
+		test_seq 1 8 >olddir/a &&
+		git add somefile olddir/a &&
+		git mv olddir newdir &&
+		git commit -m "Dir renamed" &&
+
+		test_seq 1 10 >somefile &&
+		git add somefile &&
+		mkdir olddir &&
+		>olddir/unrelated-file &&
+		git add olddir &&
+		git commit -m "Unrelated file in recreated old dir"
+	)
+}
+
+#
+# In the following testcase, the first commit on the topic branch renames
+# a directory, while the second recreates the old directory and places a
+# file into it:
+#   Base:     somefile
+#             olddir/a
+#             olddir/b
+#   Upstream: olddir/newfile
+#   Topic_1:  somefile_2
+#             rename olddir/ -> newdir/
+#   Topic_2:  olddir/unrelated-file
+#
+# Note that the first pick should merge:
+#   Base:     somefile
+#             olddir/{a,b}
+#   Upstream: olddir/newfile
+#   Topic_1:  rename olddir/ -> newdir/
+# For which the expected result (assuming merge.directoryRenames=true) is
+# clearly:
+#   Result:   somefile
+#             newdir/{a, b, newfile}
+#
+# While the second pick does the following three-way merge:
+#   Base (Topic_1):           somefile
+#                             newdir/{a,b}
+#   Upstream (Result from 1): same files as base, but adds newdir/newfile
+#   Topic_2:                  same files as base, but adds olddir/unrelated-file
+#
+# The second merge is pretty trivial; upstream adds newdir/newfile, and
+# topic_2 adds olddir/unrelated-file.  We're just testing that we don't
+# accidentally cache directory renames somehow and rename
+# olddir/unrelated-file to newdir/unrelated-file.
+#
+# This testcase should only need one call to diffcore_rename_extended().
+test_expect_success 'caching renames only on upstream side, part 1' '
+	test_setup_topic_rename cache-renames-only-upstream-add-file &&
+	(
+		cd cache-renames-only-upstream-add-file &&
+
+		git switch upstream &&
+
+		>olddir/newfile &&
+		git add olddir/newfile &&
+		git commit -m "Add newfile" &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream..topic &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 1 calls &&
+
+		git ls-files >tracked &&
+		test_line_count = 5 tracked &&
+		test_path_is_missing newdir/unrelated-file &&
+		test_path_is_file olddir/unrelated-file &&
+		test_path_is_file newdir/newfile &&
+		test_path_is_file newdir/b &&
+		test_path_is_file newdir/a &&
+		test_path_is_file somefile
+	)
+'
+
+#
+# The following testcase is *very* similar to the last one, but instead of
+# adding a new olddir/newfile, it renames somefile -> olddir/newfile:
+#   Base:     somefile
+#             olddir/a
+#             olddir/b
+#   Upstream: somefile_1 -> olddir/newfile
+#   Topic_1:  rename olddir/ -> newdir/
+#             somefile_2
+#   Topic_2:  olddir/unrelated-file
+#             somefile_3
+#
+# Much like the previous test, this case is actually trivial and we are just
+# making sure there isn't some spurious directory rename caching going on
+# for the wrong side of history.
+#
+#
+# This testcase should only need three calls to diffcore_rename_extended(),
+# because there are no renames on the topic side of history for picking
+# Topic_2.
+#
+test_expect_success 'caching renames only on upstream side, part 2' '
+	test_setup_topic_rename cache-renames-only-upstream-rename-file &&
+	(
+		cd cache-renames-only-upstream-rename-file &&
+
+		git switch upstream &&
+
+		git mv somefile olddir/newfile &&
+		git commit -m "Add newfile" &&
+
+		#
+		# Actual testing
+		#
+
+		git switch upstream &&
+
+		git config merge.directoryRenames true &&
+
+		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
+		export GIT_TRACE2_PERF &&
+
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		#git cherry-pick upstream..topic &&
+
+		grep region_enter.*diffcore_rename trace.output >calls &&
+		test_line_count = 3 calls &&
+
+		git ls-files >tracked &&
+		test_line_count = 4 tracked &&
+		test_path_is_missing newdir/unrelated-file &&
+		test_path_is_file olddir/unrelated-file &&
+		test_path_is_file newdir/newfile &&
+		test_path_is_file newdir/b &&
+		test_path_is_file newdir/a
+	)
+'
+
+test_done

From d29bd6d73da5c40935713731919b1950692aff15 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:34 +0000
Subject: [PATCH 039/110] merge-ort: add data structures for in-memory caching
 of rename detection

When there are many renames between the old base of a series of commits
and the new base for a series of commits, the sequence of merges
employed to transplant those commits (from a cherry-pick or rebase
operation) will repeatedly detect the exact same renames.  This is
wasted effort.

Add data structures which will be used to cache rename detection
results, along with the initialization and deallocation of these data
structures.  Future commits will populate these caches, detect the
appropriate circumstances when they can be used, and employ them to
avoid re-detecting the same renames repeatedly.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/merge-ort.c b/merge-ort.c
index 8258d3fd621e0f..6a06c6e5a317a0 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -139,6 +139,48 @@ struct rename_info {
 	int callback_data_nr, callback_data_alloc;
 	char *callback_data_traverse_path;
 
+	/*
+	 * cached_pairs: Caching of renames and deletions.
+	 *
+	 * These are mappings recording renames and deletions of individual
+	 * files (not directories).  They are thus a map from an old
+	 * filename to either NULL (for deletions) or a new filename (for
+	 * renames).
+	 */
+	struct strmap cached_pairs[3];
+
+	/*
+	 * cached_target_names: just the destinations from cached_pairs
+	 *
+	 * We sometimes want a fast lookup to determine if a given filename
+	 * is one of the destinations in cached_pairs.  cached_target_names
+	 * is thus duplicative information, but it provides a fast lookup.
+	 */
+	struct strset cached_target_names[3];
+
+	/*
+	 * cached_irrelevant: Caching of rename_sources that aren't relevant.
+	 *
+	 * If we try to detect a rename for a source path and succeed, it's
+	 * part of a rename.  If we try to detect a rename for a source path
+	 * and fail, then it's a delete.  If we do not try to detect a rename
+	 * for a path, then we don't know if it's a rename or a delete.  If
+	 * merge-ort doesn't think the path is relevant, then we just won't
+	 * cache anything for that path.  But there's a slight problem in
+	 * that merge-ort can think a path is RELEVANT_LOCATION, but due to
+	 * commit 9bd342137e ("diffcore-rename: determine which
+	 * relevant_sources are no longer relevant", 2021-03-13),
+	 * diffcore-rename can downgrade the path to RELEVANT_NO_MORE.  To
+	 * avoid excessive calls to diffcore_rename_extended() we still need
+	 * to cache such paths, though we cannot record them as either
+	 * renames or deletes.  So we cache them here as a "turned out to be
+	 * irrelevant *for this commit*" as they are often also irrelevant
+	 * for subsequent commits, though we will have to do some extra
+	 * checking to see whether such paths become relevant for rename
+	 * detection when cherry-picking/rebasing subsequent commits.
+	 */
+	struct strset cached_irrelevant[3];
+
 	/*
 	 * needed_limit: value needed for inexact rename detection to run
 	 *
@@ -381,6 +423,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 		reinitialize ? strmap_partial_clear : strmap_clear;
 	void (*strintmap_func)(struct strintmap *) =
 		reinitialize ? strintmap_partial_clear : strintmap_clear;
+	void (*strset_func)(struct strset *) =
+		reinitialize ? strset_partial_clear : strset_clear;
 
 	/*
 	 * We marked opti->paths with strdup_strings = 0, so that we
@@ -424,6 +468,9 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 		strmap_func(&renames->dir_renames[i], 0);
 
 		strintmap_func(&renames->relevant_sources[i]);
+		strset_func(&renames->cached_target_names[i]);
+		strmap_func(&renames->cached_pairs[i], 1);
+		strset_func(&renames->cached_irrelevant[i]);
 	}
 
 	if (!reinitialize) {
@@ -3675,6 +3722,12 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 					 NULL, 0);
 		strintmap_init_with_options(&renames->relevant_sources[i],
 					    0, NULL, 0);
+		strmap_init_with_options(&renames->cached_pairs[i],
+					 NULL, 1);
+		strset_init_with_options(&renames->cached_irrelevant[i],
+					 NULL, 1);
+		strset_init_with_options(&renames->cached_target_names[i],
+					 NULL, 0);
 	}
 
 	/*

From 2734f2e3243ce19af1d9c9c92dffae13af7b0fe5 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:35 +0000
Subject: [PATCH 040/110] merge-ort: populate caches of rename detection
 results

Fill in cache_pairs, cached_target_names, and cached_irrelevant based on
rename detection results.  Future commits will make use of these values.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/merge-ort.c b/merge-ort.c
index 6a06c6e5a317a0..d515f5cc3ff62b 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2344,6 +2344,67 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 	}
 }
 
+static void cache_new_pair(struct rename_info *renames,
+			   int side,
+			   char *old_path,
+			   char *new_path,
+			   int free_old_value)
+{
+	char *old_value;
+	new_path = xstrdup(new_path);
+	old_value = strmap_put(&renames->cached_pairs[side],
+			       old_path, new_path);
+	strset_add(&renames->cached_target_names[side], new_path);
+	if (free_old_value)
+		free(old_value);
+	else
+		assert(!old_value);
+}
+
+static void possibly_cache_new_pair(struct rename_info *renames,
+				    struct diff_filepair *p,
+				    unsigned side,
+				    char *new_path)
+{
+	int dir_renamed_side = 0;
+
+	if (new_path) {
+		/*
+		 * Directory renames happen on the other side of history from
+		 * the side that adds new files to the old directory.
+		 */
+		dir_renamed_side = 3 - side;
+	} else {
+		int val = strintmap_get(&renames->relevant_sources[side],
+					p->one->path);
+		if (val == RELEVANT_NO_MORE) {
+			assert(p->status == 'D');
+			strset_add(&renames->cached_irrelevant[side],
+				   p->one->path);
+		}
+		if (val <= 0)
+			return;
+	}
+
+	if (p->status == 'D') {
+		/*
+		 * If we already had this delete, we'll just set it's value
+		 * to NULL again, so no harm.
+		 */
+		strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
+	} else if (p->status == 'R') {
+		if (!new_path)
+			new_path = p->two->path;
+		else
+			cache_new_pair(renames, dir_renamed_side,
+				       p->two->path, new_path, 0);
+		cache_new_pair(renames, side, p->one->path, new_path, 1);
+	} else if (p->status == 'A' && new_path) {
+		cache_new_pair(renames, dir_renamed_side,
+			       p->two->path, new_path, 0);
+	}
+}
+
 static int compare_pairs(const void *a_, const void *b_)
 {
 	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
@@ -2426,6 +2487,7 @@ static int collect_renames(struct merge_options *opt,
 		char *new_path; /* non-NULL only with directory renames */
 
 		if (p->status != 'A' && p->status != 'R') {
+			possibly_cache_new_pair(renames, p, side_index, NULL);
 			diff_free_filepair(p);
 			continue;
 		}
@@ -2437,6 +2499,7 @@ static int collect_renames(struct merge_options *opt,
 						      &collisions,
 						      &clean);
 
+		possibly_cache_new_pair(renames, p, side_index, new_path);
 		if (p->status != 'R' && !new_path) {
 			diff_free_filepair(p);
 			continue;
@@ -3720,8 +3783,16 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 					 NULL, 1);
 		strmap_init_with_options(&renames->dir_renames[i],
 					 NULL, 0);
+		/*
+		 * relevant_sources uses -1 for the default, because we need
+		 * to be able to distinguish not-in-strintmap from valid
+		 * relevant_source values from enum file_rename_relevance.
+		 * In particular, possibly_cache_new_pair() expects a negative
+		 * value for not-found entries.
+		 */
 		strintmap_init_with_options(&renames->relevant_sources[i],
-					    0, NULL, 0);
+					    -1 /* explicitly invalid */,
+					    NULL, 0);
 		strmap_init_with_options(&renames->cached_pairs[i],
 					 NULL, 1);
 		strset_init_with_options(&renames->cached_irrelevant[i],

From 64aceb6d738394130a6e215dc6de51d8452313e0 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:36 +0000
Subject: [PATCH 041/110] merge-ort: add code to check for whether cached
 renames can be reused

We need to know when renames detected in a previous merge operation can
be reused in a later merge operation.  Consider the following setup
(from the git-rebase manpage):

                     A---B---C topic
                    /
               D---E---F---G master

After rebasing, this will appear as:

                             A'--B'--C' topic
                            /
               D---E---F---G master

Further, let's say that 'oldfile' was renamed to 'newfile' between E
and G.  The rebase or cherry-pick of A onto G will involve a three-way
merge between E (as the merge base) and G and A.  After detecting the
rename between E:oldfile and G:newfile, there will be a three-way
content merge of the following:
    E:oldfile
    G:newfile
    A:oldfile
and produce a new result:
    A':newfile

Now, when we want to pick B onto A', we will need to do a three-way
merge between A (as the merge-base) and A' and B.  This will involve
a three-way content merge of
    A:oldfile
    A':newfile
    B:oldfile
but only if we can detect that A:oldfile is similar enough to A':newfile
to be used together in a three-way content merge, i.e. only if we can
detect that A:oldfile and A':newfile are a rename.  But we already know
that A:oldfile and A':newfile are similar enough to be used in a
three-way content merge, because that is precisely where A':newfile came
from in the previous merge.

Note that A & A' both appear in both merges.  That gives us the
condition under which we can reuse renames.

There are a couple important points about this optimization:

  - If the rebase or cherry-pick halts for user conflicts, these caches
    are NOT saved anywhere.  Thus, resuming a halted rebase or
    cherry-pick will result in no reused renames for the next commit.
    This is intentional, as user resolution can change files
    significantly and in ways that violate the similarity assumptions
    here.

  - Technically, in a *very* narrow case this might give slightly
    different results for rename detection.  Using the example above,
    if:
      * E:oldfile had 20 lines
      * G:newfile added 10 new lines at the beginning of the file
      * A:oldfile deleted all but the first three lines of the file
    then
      => A':newfile would have 13 lines, 3 of which matches those
         in A:oldfile.

    Consider the two cases:
      * Without this optimization:
        - the next step of the rebase operation (moving B to B')
          would not detect the rename betwen A:oldfile and A':newfile
        - we'd thus get a modify/delete conflict with the rebase
          operation halting for the user to resolve, and have both
          A':newfile and B:oldfile sitting in the working tree.
      * With this optimization:
        - the rename between A:oldfile and A':newfile would be detected
          via the cache of renames
        - a three-way merge between A:oldfile, A':newfile, and B:oldfile
          would commence and be written to A':newfile

    Now, is the difference in behavior a bug...or a bugfix?  I can't
    tell.  Given that A:oldfile and A':newfile are not very similar,
    when we three-way merge with B:oldfile it seems likely we'll hit a
    conflict for the user to resolve.  And it shouldn't be too hard for
    users to see why we did that three-way merge; oldfile and newfile
    *were* renames somewhere in the sequence.  So, most of these corner
    cases will still behave similarly -- namely, a conflict given to the
    user to resolve.  Also, consider the interesting case when commit B
    is a clean revert of commit A.  Without this optimization, a rebase
    could not both apply a weird patch like A and then immediately
    revert it; users would be forced to resolve merge conflicts.  With
    this optimization, it would successfully apply the clean revert.
    So, there is certainly at least one case that behaves better.  Even
    if it's considered a "difference in behavior", I think both behaviors
    are reasonable, and the time savings provided by this optimization
    justify using the slightly altered rename heuristics.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/merge-ort.c b/merge-ort.c
index d515f5cc3ff62b..1176bae25f4b14 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -139,6 +139,30 @@ struct rename_info {
 	int callback_data_nr, callback_data_alloc;
 	char *callback_data_traverse_path;
 
+	/*
+	 * merge_trees: trees passed to the merge algorithm for the merge
+	 *
+	 * merge_trees records the trees passed to the merge algorithm.  But,
+	 * this data also is stored in merge_result->priv.  If a sequence of
+	 * merges are being done (such as when cherry-picking or rebasing),
+	 * the next merge can look at this and re-use information from
+	 * previous merges under certain circumstances.
+	 *
+	 * See also all the cached_* variables.
+	 */
+	struct tree *merge_trees[3];
+
+	/*
+	 * cached_pairs_valid_side: which side's cached info can be reused
+	 *
+	 * See the description for merge_trees.  For repeated merges, at most
+	 * only one side's cached information can be used.  Valid values:
+	 *   MERGE_SIDE2: cached data from side2 can be reused
+	 *   MERGE_SIDE1: cached data from side1 can be reused
+	 *   0:           no cached data can be reused
+	 */
+	int cached_pairs_valid_side;
+
 	/*
 	 * cached_pairs: Caching of renames and deletions.
 	 *
@@ -472,6 +496,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 		strmap_func(&renames->cached_pairs[i], 1);
 		strset_func(&renames->cached_irrelevant[i]);
 	}
+	renames->cached_pairs_valid_side = 0;
+	renames->dir_rename_mask = 0;
 
 	if (!reinitialize) {
 		struct hashmap_iter iter;
@@ -494,8 +520,6 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 		strmap_clear(&opti->output, 0);
 	}
 
-	renames->dir_rename_mask = 0;
-
 	/* Clean out callback_data as well. */
 	FREE_AND_NULL(renames->callback_data);
 	renames->callback_data_nr = renames->callback_data_alloc = 0;
@@ -3824,6 +3848,35 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 	trace2_region_leave("merge", "allocate/init", opt->repo);
 }
 
+static void merge_check_renames_reusable(struct merge_options *opt,
+					 struct merge_result *result,
+					 struct tree *merge_base,
+					 struct tree *side1,
+					 struct tree *side2)
+{
+	struct rename_info *renames;
+	struct tree **merge_trees;
+	struct merge_options_internal *opti = result->priv;
+
+	if (!opti)
+		return;
+
+	renames = &opti->renames;
+	merge_trees = renames->merge_trees;
+	/* merge_trees[0..2] will only be NULL if opti is */
+	assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
+
+	/* Check if we meet a condition for re-using cached_pairs */
+	if (oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) &&
+	    oideq(&side1->object.oid, &result->tree->object.oid))
+		renames->cached_pairs_valid_side = MERGE_SIDE1;
+	else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) &&
+		 oideq(&side2->object.oid, &result->tree->object.oid))
+		renames->cached_pairs_valid_side = MERGE_SIDE2;
+	else
+		renames->cached_pairs_valid_side = 0; /* neither side valid */
+}
+
 /*** Function Grouping: merge_incore_*() and their internal variants ***/
 
 /*
@@ -3971,7 +4024,16 @@ void merge_incore_nonrecursive(struct merge_options *opt,
 
 	trace2_region_enter("merge", "merge_start", opt->repo);
 	assert(opt->ancestor != NULL);
+	merge_check_renames_reusable(opt, result, merge_base, side1, side2);
 	merge_start(opt, result);
+	/*
+	 * Record the trees used in this merge, so if there's a next merge in
+	 * a cherry-pick or rebase sequence it might be able to take advantage
+	 * of the cached_pairs in that next merge.
+	 */
+	opt->priv->renames.merge_trees[0] = merge_base;
+	opt->priv->renames.merge_trees[1] = side1;
+	opt->priv->renames.merge_trees[2] = side2;
 	trace2_region_leave("merge", "merge_start", opt->repo);
 
 	merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);

From 19ceb486f8dd25fb5782724c454edb2f06f1ed71 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:37 +0000
Subject: [PATCH 042/110] merge-ort: avoid accidental API mis-use

Previously, callers of the merge-ort API could have passed an
uninitialized value for struct merge_result *result.  However, we want
to check result to see if it has cached renames from a previous merge
that we can reuse; such values would be found behind result->priv.
However, if result->priv is uninitialized, attempting to access behind
it will give a segfault.  So, we need result->priv to be NULL (which
will be the case if the caller does a memset(&result, 0)), or be written
by a previous call to the merge-ort machinery.  Documenting this
requirement may help, but despite being the person who introduced this
requirement, I still missed it once and it did not fail in a very clear
way and led to a long debugging session.

Add a _properly_initialized field to merge_result; that value will be
0 if the caller zero'ed the merge_result, it will be set to a very
specific value by a previous run by the merge-ort machinery, and if it's
uninitialized it will most likely either be 0 or some value that does
not match the specific one we'd expect allowing us to throw a much more
meaningful error.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 7 +++++++
 merge-ort.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/merge-ort.c b/merge-ort.c
index 1176bae25f4b14..8b2c93fdcf264e 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -52,6 +52,8 @@ enum merge_side {
 	MERGE_SIDE2 = 2
 };
 
+static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */
+
 struct traversal_callback_data {
 	unsigned long mask;
 	unsigned long dirmask;
@@ -3768,6 +3770,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 	assert(opt->obuf.len == 0);
 
 	assert(opt->priv == NULL);
+	if (result->_properly_initialized != 0 &&
+	    result->_properly_initialized != RESULT_INITIALIZED)
+		BUG("struct merge_result passed to merge_incore_*recursive() must be zeroed or filled with values from a previous run");
+	assert(!!result->priv == !!result->_properly_initialized);
 	if (result->priv) {
 		opt->priv = result->priv;
 		result->priv = NULL;
@@ -3927,6 +3933,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
 	result->clean &= strmap_empty(&opt->priv->conflicted);
 	if (!opt->priv->call_depth) {
 		result->priv = opt->priv;
+		result->_properly_initialized = RESULT_INITIALIZED;
 		opt->priv = NULL;
 	}
 }
diff --git a/merge-ort.h b/merge-ort.h
index d53a0a339f338c..c011864ffeb115 100644
--- a/merge-ort.h
+++ b/merge-ort.h
@@ -29,6 +29,8 @@ struct merge_result {
 	 * !clean) and to print "CONFLICT" messages.  Not for external use.
 	 */
 	void *priv;
+	/* Also private */
+	unsigned _properly_initialized;
 };
 
 /*

From d509802993e8423d459a05fcd6151ca1782caa07 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:38 +0000
Subject: [PATCH 043/110] merge-ort: preserve cached renames for the
 appropriate side

Previous commits created an in-memory cache of the results of rename
detection, and added logic to detect when that cache could appropriately
be used in a subsequent merge operation -- but we were still
unconditionally clearing the cache with each new merge operation anyway.
If it is valid to reuse the cache from one of the two sides of history,
preserve that side.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/merge-ort.c b/merge-ort.c
index 8b2c93fdcf264e..cc1adc74561dc1 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -486,17 +486,18 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 	/* Free memory used by various renames maps */
 	for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
 		strintmap_func(&renames->dirs_removed[i]);
-
-		partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
-		if (!reinitialize)
-			strmap_clear(&renames->dir_rename_count[i], 1);
-
 		strmap_func(&renames->dir_renames[i], 0);
-
 		strintmap_func(&renames->relevant_sources[i]);
-		strset_func(&renames->cached_target_names[i]);
-		strmap_func(&renames->cached_pairs[i], 1);
-		strset_func(&renames->cached_irrelevant[i]);
+		if (!reinitialize)
+			assert(renames->cached_pairs_valid_side == 0);
+		if (i != renames->cached_pairs_valid_side) {
+			strset_func(&renames->cached_target_names[i]);
+			strmap_func(&renames->cached_pairs[i], 1);
+			strset_func(&renames->cached_irrelevant[i]);
+			partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
+			if (!reinitialize)
+				strmap_clear(&renames->dir_rename_count[i], 1);
+		}
 	}
 	renames->cached_pairs_valid_side = 0;
 	renames->dir_rename_mask = 0;
@@ -2456,6 +2457,7 @@ static void detect_regular_renames(struct merge_options *opt,
 		return;
 	}
 
+	partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
 	repo_diff_setup(opt->repo, &diff_opts);
 	diff_opts.flags.recursive = 1;
 	diff_opts.flags.rename_empty = 0;

From 86b41b389546e68164ed58f6e60297a391cdca83 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:39 +0000
Subject: [PATCH 044/110] merge-ort: add helper functions for using cached
 renames

If we have a usable rename cache, then we can remove from
relevant_sources all the paths that were cached;
diffcore_rename_extended() can then consider an even smaller set of
relevant_sources in its rename detection.

However, when diffcore_rename_extended() is done, we will need to take
the renames it detected and then add back in all the ones we had cached
from before.

Add helper functions for doing these two operations; the next commit
will make use of them.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/merge-ort.c b/merge-ort.c
index cc1adc74561dc1..8e38dac3413fba 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2371,6 +2371,53 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 	}
 }
 
+MAYBE_UNUSED
+static void prune_cached_from_relevant(struct rename_info *renames,
+				       unsigned side)
+{
+	/* Reason for this function described in add_pair() */
+	struct hashmap_iter iter;
+	struct strmap_entry *entry;
+
+	/* Remove from relevant_sources all entries in cached_pairs[side] */
+	strmap_for_each_entry(&renames->cached_pairs[side], &iter, entry) {
+		strintmap_remove(&renames->relevant_sources[side],
+				 entry->key);
+	}
+	/* Remove from relevant_sources all entries in cached_irrelevant[side] */
+	strset_for_each_entry(&renames->cached_irrelevant[side], &iter, entry) {
+		strintmap_remove(&renames->relevant_sources[side],
+				 entry->key);
+	}
+}
+
+MAYBE_UNUSED
+static void use_cached_pairs(struct merge_options *opt,
+			     struct strmap *cached_pairs,
+			     struct diff_queue_struct *pairs)
+{
+	struct hashmap_iter iter;
+	struct strmap_entry *entry;
+
+	/*
+	 * Add to side_pairs all entries from renames->cached_pairs[side_index].
+	 * (Info in cached_irrelevant[side_index] is not relevant here.)
+	 */
+	strmap_for_each_entry(cached_pairs, &iter, entry) {
+		struct diff_filespec *one, *two;
+		const char *old_name = entry->key;
+		const char *new_name = entry->value;
+		if (!new_name)
+			new_name = old_name;
+
+		/* We don't care about oid/mode, only filenames and status */
+		one = alloc_filespec(old_name);
+		two = alloc_filespec(new_name);
+		diff_queue(pairs, one, two);
+		pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
+	}
+}
+
 static void cache_new_pair(struct rename_info *renames,
 			   int side,
 			   char *old_path,

From cbdca289fbb011e7397fecfebeeac3f887ef22d1 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:40 +0000
Subject: [PATCH 045/110] merge-ort: handle interactions of caching and
 rename/rename(1to1) cases

As documented in Documentation/technical/remembering-renames.txt, and as
tested for in the two testcases in t6429 with "rename same file
identically" in their description, there is one case where we need to
have renames in one commit NOT be cached for the next commit in our
rebase sequence -- namely, rename/rename(1to1) cases.  Rather than
specifically trying to uncache those and fix up dir_rename_counts() to
match (which would also be valid but more work), we simply disable the
optimization when this really rare type of rename occurs.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 merge-ort.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/merge-ort.c b/merge-ort.c
index 8e38dac3413fba..de96fe4f6396d4 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2111,6 +2111,9 @@ static int process_renames(struct merge_options *opt,
 			VERIFY_CI(side2);
 
 			if (!strcmp(pathnames[1], pathnames[2])) {
+				struct rename_info *ri = &opt->priv->renames;
+				int j;
+
 				/* Both sides renamed the same way */
 				assert(side1 == side2);
 				memcpy(&side1->stages[0], &base->stages[0],
@@ -2120,6 +2123,16 @@ static int process_renames(struct merge_options *opt,
 				base->merged.is_null = 1;
 				base->merged.clean = 1;
 
+				/*
+				 * Disable remembering renames optimization;
+				 * rename/rename(1to1) is incredibly rare, and
+				 * just disabling the optimization is easier
+				 * than purging cached_pairs,
+				 * cached_target_names, and dir_rename_counts.
+				 */
+				for (j = 0; j < 3; j++)
+					ri->merge_trees[j] = NULL;
+
 				/* We handled both renames, i.e. i+1 handled */
 				i++;
 				/* Move to next rename */
@@ -3918,7 +3931,22 @@ static void merge_check_renames_reusable(struct merge_options *opt,
 
 	renames = &opti->renames;
 	merge_trees = renames->merge_trees;
-	/* merge_trees[0..2] will only be NULL if opti is */
+
+	/*
+	 * Handle case where previous merge operation did not want cache to
+	 * take effect, e.g. because rename/rename(1to1) makes it invalid.
+	 */
+	if (!merge_trees[0]) {
+		assert(!merge_trees[0] && !merge_trees[1] && !merge_trees[2]);
+		renames->cached_pairs_valid_side = 0; /* neither side valid */
+		return;
+	}
+
+	/*
+	 * Handle other cases; note that merge_trees[0..2] will only
+	 * be NULL if opti is, or if all three were manually set to
+	 * NULL by e.g. rename/rename(1to1) handling.
+	 */
 	assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
 
 	/* Check if we meet a condition for re-using cached_pairs */

From 25e65b6dd52c987056f1cac00fe6073fbf8ea237 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Thu, 20 May 2021 06:09:41 +0000
Subject: [PATCH 046/110] merge-ort, diffcore-rename: employ cached renames
 when possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When there are many renames between the old base of a series of commits
and the new base, the way sequencer.c, merge-recursive.c, and
diffcore-rename.c have traditionally split the work resulted in
redetecting the same renames with each and every commit being
transplanted.  To address this, the last several commits have been
creating a cache of rename detection results, determining when it was
safe to use such a cache in subsequent merge operations, adding helper
functions, and so on.  See the previous half dozen commit messages for
additional discussion of this optimization, particularly the message a
few commits ago entitled "add code to check for whether cached renames
can be reused".  This commit finally ties all of that work together,
modifying the merge algorithm to make use of these cached renames.

For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:        5.665 s ±  0.129 s     5.622 s ±  0.059 s
    mega-renames:     11.435 s ±  0.158 s    10.127 s ±  0.073 s
    just-one-mega:   494.2  ms ±  6.1  ms   500.3  ms ±  3.8  ms

That's a fairly small improvement, but mostly because the previous
optimizations were so effective for these particular testcases; this
optimization only kicks in when the others don't.  If we undid the
basename-guided rename detection and skip-irrelevant-renames
optimizations, then we'd see that this series by itself improved
performance as follows:

                   Before Basename Series   After Just This Series
    no-renames:      13.815 s ±  0.062 s      5.697 s ±  0.080 s
    mega-renames:  1799.937 s ±  0.493 s    205.709 s ±  0.457 s

Since this optimization kicks in to help accelerate cases where the
previous optimizations do not apply, this last comparison shows that
this cached-renames optimization has the potential to help signficantly
in cases that don't meet the requirements for the other optimizations to
be effective.

The changes made in this optimization also lay some important groundwork
for a future optimization around having collect_merge_info() avoid
recursing into subtrees in more cases.

However, for this optimization to be effective, merge_switch_to_result()
should only be called when the rebase or cherry-pick operation has
either completed or hit a case where the user needs to resolve a
conflict or edit the result.  If it is called after every commit, as
sequencer.c does, then the working tree and index are needlessly updated
with every commit and the cached metadata is tossed, defeating this
optimization.  Refactoring sequencer.c to only call
merge_switch_to_result() at the end of the operation is a bigger
undertaking, and the practical benefits of this optimization will not be
realized until that work is performed.  Since `test-tool fast-rebase`
only updates at the end of the operation, it was used to obtain the
timings above.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diffcore-rename.c                        | 22 +++++++++--
 diffcore.h                               |  3 +-
 merge-ort.c                              | 47 ++++++++++++++++++++---
 t/t6429-merge-sequence-rename-caching.sh | 48 ++++++++++++++----------
 4 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/diffcore-rename.c b/diffcore-rename.c
index 7cc24592617e46..dfbe65c917e9af 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -568,7 +568,8 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
 static void initialize_dir_rename_info(struct dir_rename_info *info,
 				       struct strintmap *relevant_sources,
 				       struct strintmap *dirs_removed,
-				       struct strmap *dir_rename_count)
+				       struct strmap *dir_rename_count,
+				       struct strmap *cached_pairs)
 {
 	struct hashmap_iter iter;
 	struct strmap_entry *entry;
@@ -633,6 +634,17 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
 					 rename_dst[i].p->two->path);
 	}
 
+	/* Add cached_pairs to counts */
+	strmap_for_each_entry(cached_pairs, &iter, entry) {
+		const char *old_name = entry->key;
+		const char *new_name = entry->value;
+		if (!new_name)
+			/* known delete; ignore it */
+			continue;
+
+		update_dir_rename_counts(info, dirs_removed, old_name, new_name);
+	}
+
 	/*
 	 * Now we collapse
 	 *    dir_rename_count: old_directory -> {new_directory -> count}
@@ -1247,7 +1259,8 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info,
 void diffcore_rename_extended(struct diff_options *options,
 			      struct strintmap *relevant_sources,
 			      struct strintmap *dirs_removed,
-			      struct strmap *dir_rename_count)
+			      struct strmap *dir_rename_count,
+			      struct strmap *cached_pairs)
 {
 	int detect_rename = options->detect_rename;
 	int minimum_score = options->rename_score;
@@ -1363,7 +1376,8 @@ void diffcore_rename_extended(struct diff_options *options,
 		/* Preparation for basename-driven matching. */
 		trace2_region_enter("diff", "dir rename setup", options->repo);
 		initialize_dir_rename_info(&info, relevant_sources,
-					   dirs_removed, dir_rename_count);
+					   dirs_removed, dir_rename_count,
+					   cached_pairs);
 		trace2_region_leave("diff", "dir rename setup", options->repo);
 
 		/* Utilize file basenames to quickly find renames. */
@@ -1561,5 +1575,5 @@ void diffcore_rename_extended(struct diff_options *options,
 
 void diffcore_rename(struct diff_options *options)
 {
-	diffcore_rename_extended(options, NULL, NULL, NULL);
+	diffcore_rename_extended(options, NULL, NULL, NULL, NULL);
 }
diff --git a/diffcore.h b/diffcore.h
index cf8d4cb8617dc7..de01e64becaf86 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -181,7 +181,8 @@ void diffcore_rename(struct diff_options *);
 void diffcore_rename_extended(struct diff_options *options,
 			      struct strintmap *relevant_sources,
 			      struct strintmap *dirs_removed,
-			      struct strmap *dir_rename_count);
+			      struct strmap *dir_rename_count,
+			      struct strmap *cached_pairs);
 void diffcore_merge_broken(void);
 void diffcore_pickaxe(struct diff_options *);
 void diffcore_order(const char *orderfile);
diff --git a/merge-ort.c b/merge-ort.c
index de96fe4f6396d4..ed21dc872436b6 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -763,15 +763,48 @@ static void add_pair(struct merge_options *opt,
 	struct rename_info *renames = &opt->priv->renames;
 	int names_idx = is_add ? side : 0;
 
-	if (!is_add) {
+	if (is_add) {
+		if (strset_contains(&renames->cached_target_names[side],
+				    pathname))
+			return;
+	} else {
 		unsigned content_relevant = (match_mask == 0);
 		unsigned location_relevant = (dir_rename_mask == 0x07);
 
+		/*
+		 * If pathname is found in cached_irrelevant[side] due to
+		 * previous pick but for this commit content is relevant,
+		 * then we need to remove it from cached_irrelevant.
+		 */
+		if (content_relevant)
+			/* strset_remove is no-op if strset doesn't have key */
+			strset_remove(&renames->cached_irrelevant[side],
+				      pathname);
+
+		/*
+		 * We do not need to re-detect renames for paths that we already
+		 * know the pairing, i.e. for cached_pairs (or
+		 * cached_irrelevant).  However, handle_deferred_entries() needs
+		 * to loop over the union of keys from relevant_sources[side] and
+		 * cached_pairs[side], so for simplicity we set relevant_sources
+		 * for all the cached_pairs too and then strip them back out in
+		 * prune_cached_from_relevant() at the beginning of
+		 * detect_regular_renames().
+		 */
 		if (content_relevant || location_relevant) {
 			/* content_relevant trumps location_relevant */
 			strintmap_set(&renames->relevant_sources[side], pathname,
 				      content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
 		}
+
+		/*
+		 * Avoid creating pair if we've already cached rename results.
+		 * Note that we do this after setting relevant_sources[side]
+		 * as noted in the comment above.
+		 */
+		if (strmap_contains(&renames->cached_pairs[side], pathname) ||
+		    strset_contains(&renames->cached_irrelevant[side], pathname))
+			return;
 	}
 
 	one = alloc_filespec(pathname);
@@ -2360,7 +2393,9 @@ static inline int possible_side_renames(struct rename_info *renames,
 static inline int possible_renames(struct rename_info *renames)
 {
 	return possible_side_renames(renames, 1) ||
-	       possible_side_renames(renames, 2);
+	       possible_side_renames(renames, 2) ||
+	       !strmap_empty(&renames->cached_pairs[1]) ||
+	       !strmap_empty(&renames->cached_pairs[2]);
 }
 
 static void resolve_diffpair_statuses(struct diff_queue_struct *q)
@@ -2384,7 +2419,6 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 	}
 }
 
-MAYBE_UNUSED
 static void prune_cached_from_relevant(struct rename_info *renames,
 				       unsigned side)
 {
@@ -2404,7 +2438,6 @@ static void prune_cached_from_relevant(struct rename_info *renames,
 	}
 }
 
-MAYBE_UNUSED
 static void use_cached_pairs(struct merge_options *opt,
 			     struct strmap *cached_pairs,
 			     struct diff_queue_struct *pairs)
@@ -2507,6 +2540,7 @@ static void detect_regular_renames(struct merge_options *opt,
 	struct diff_options diff_opts;
 	struct rename_info *renames = &opt->priv->renames;
 
+	prune_cached_from_relevant(renames, side_index);
 	if (!possible_side_renames(renames, side_index)) {
 		/*
 		 * No rename detection needed for this side, but we still need
@@ -2535,7 +2569,8 @@ static void detect_regular_renames(struct merge_options *opt,
 	diffcore_rename_extended(&diff_opts,
 				 &renames->relevant_sources[side_index],
 				 &renames->dirs_removed[side_index],
-				 &renames->dir_rename_count[side_index]);
+				 &renames->dir_rename_count[side_index],
+				 &renames->cached_pairs[side_index]);
 	trace2_region_leave("diff", "diffcore_rename", opt->repo);
 	resolve_diffpair_statuses(&diff_queued_diff);
 
@@ -2643,6 +2678,8 @@ static int detect_and_process_renames(struct merge_options *opt,
 	trace2_region_enter("merge", "regular renames", opt->repo);
 	detect_regular_renames(opt, MERGE_SIDE1);
 	detect_regular_renames(opt, MERGE_SIDE2);
+	use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
+	use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
 	trace2_region_leave("merge", "regular renames", opt->repo);
 
 	trace2_region_enter("merge", "directory renames", opt->repo);
diff --git a/t/t6429-merge-sequence-rename-caching.sh b/t/t6429-merge-sequence-rename-caching.sh
index f47d8924ee730d..035edc40b1ebba 100755
--- a/t/t6429-merge-sequence-rename-caching.sh
+++ b/t/t6429-merge-sequence-rename-caching.sh
@@ -101,10 +101,10 @@ test_expect_success 'caching renames does not preclude finding new ones' '
 # dramatic change in size of the file, but remembering the rename and
 # reusing it is reasonable too.
 #
-# Rename detection (diffcore_rename_extended()) will run twice here; it is
-# not needed on the topic side of history for either of the two commits
-# being merged, but it is needed on the upstream side of history for each
-# commit being picked.
+# We do test here that we expect rename detection to only be run once total
+# (the topic side of history doesn't need renames, and with caching we
+# should be able to only run rename detection on the upstream side one
+# time.)
 test_expect_success 'cherry-pick both a commit and its immediate revert' '
 	test_create_repo pick-commit-and-its-immediate-revert &&
 	(
@@ -140,11 +140,11 @@ test_expect_success 'cherry-pick both a commit and its immediate revert' '
 		GIT_TRACE2_PERF="$(pwd)/trace.output" &&
 		export GIT_TRACE2_PERF &&
 
-		test_might_fail test-tool fast-rebase --onto HEAD upstream~1 topic &&
+		test-tool fast-rebase --onto HEAD upstream~1 topic &&
 		#git cherry-pick upstream~1..topic &&
 
 		grep region_enter.*diffcore_rename trace.output >calls &&
-		test_line_count = 2 calls
+		test_line_count = 1 calls
 	)
 '
 
@@ -304,9 +304,11 @@ test_expect_success 'rename same file identically, then add file to old dir' '
 # Here we are just concerned that cached renames might prevent us from seeing
 # the rename conflict, and we want to ensure that we do get a conflict.
 #
-# While at it, also test that we do rename detection three times.  We have to
-# detect renames on the upstream side of history once for each merge, plus
-# Topic_2 has renames.
+# While at it, though, we do test that we only try to detect renames 2
+# times and not three.  (The first merge needs to detect renames on the
+# upstream side.  Traditionally, the second merge would need to detect
+# renames on both sides of history, but our caching of upstream renames
+# should avoid the need to re-detect upstream renames.)
 #
 test_expect_success 'cached dir rename does not prevent noticing later conflict' '
 	test_create_repo dir-rename-cache-not-occluding-later-conflict &&
@@ -357,7 +359,7 @@ test_expect_success 'cached dir rename does not prevent noticing later conflict'
 		grep CONFLICT..rename/rename output &&
 
 		grep region_enter.*diffcore_rename trace.output >calls &&
-		test_line_count = 3 calls
+		test_line_count = 2 calls
 	)
 '
 
@@ -412,10 +414,17 @@ test_setup_upstream_rename () {
 # commit to mess up its location either.  We want to make sure that
 # olddir/newfile doesn't exist in the result and that newdir/newfile does.
 #
-# We also expect rename detection to occur three times.  Although it is
-# typically needed two times per commit, there are no deleted files on the
-# topic side of history, so we only need to detect renames on the upstream
-# side for each of the 3 commits we need to pick.
+# We also test that we only do rename detection twice.  We never need
+# rename detection on the topic side of history, but we do need it twice on
+# the upstream side of history.  For the first topic commit, we only need
+# the
+#   relevant-rename -> renamed
+# rename, because olddir is unmodified by Topic_1.  For Topic_2, however,
+# the new file being added to olddir means files that were previously
+# irrelevant for rename detection are now relevant, forcing us to repeat
+# rename detection for the paths we don't already have cached.  Topic_3 also
+# tweaks olddir/newfile, but the renames in olddir/ will have been cached
+# from the second rename detection run.
 #
 test_expect_success 'dir rename unneeded, then add new file to old dir' '
 	test_setup_upstream_rename dir-rename-unneeded-until-new-file &&
@@ -450,7 +459,7 @@ test_expect_success 'dir rename unneeded, then add new file to old dir' '
 		#git cherry-pick upstream..topic &&
 
 		grep region_enter.*diffcore_rename trace.output >calls &&
-		test_line_count = 3 calls &&
+		test_line_count = 2 calls &&
 
 		git ls-files >tracked &&
 		test_line_count = 5 tracked &&
@@ -516,7 +525,7 @@ test_expect_success 'dir rename unneeded, then rename existing file into old dir
 		#git cherry-pick upstream..topic &&
 
 		grep region_enter.*diffcore_rename trace.output >calls &&
-		test_line_count = 4 calls &&
+		test_line_count = 3 calls &&
 
 		test_path_is_missing somefile &&
 		test_path_is_missing olddir/newfile &&
@@ -648,9 +657,8 @@ test_expect_success 'caching renames only on upstream side, part 1' '
 # for the wrong side of history.
 #
 #
-# This testcase should only need three calls to diffcore_rename_extended(),
-# because there are no renames on the topic side of history for picking
-# Topic_2.
+# This testcase should only need two calls to diffcore_rename_extended(),
+# both for the first merge, one for each side of history.
 #
 test_expect_success 'caching renames only on upstream side, part 2' '
 	test_setup_topic_rename cache-renames-only-upstream-rename-file &&
@@ -677,7 +685,7 @@ test_expect_success 'caching renames only on upstream side, part 2' '
 		#git cherry-pick upstream..topic &&
 
 		grep region_enter.*diffcore_rename trace.output >calls &&
-		test_line_count = 3 calls &&
+		test_line_count = 2 calls &&
 
 		git ls-files >tracked &&
 		test_line_count = 4 tracked &&

From 4e0a64a7135b204f140058ed9f53299819db447e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Thu, 20 May 2021 13:05:45 +0200
Subject: [PATCH 047/110] trace2: refactor to avoid gcc warning under -O3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor tr2_dst_try_uds_connect() to avoid a gcc warning[1] that
appears under -O3 (but not -O2). This makes the build pass under
DEVELOPER=1 without needing a DEVOPTS=no-error.

This can be reproduced with GCC Debian 8.3.0-6, but not e.g. with
clang 7.0.1-8+deb10u2. We've had this warning since
ee4512ed481 (trace2: create new combined trace facility, 2019-02-22).

As noted in [2] this warning happens because the compiler doesn't
assume that errno must be non-zero after a failed syscall.

Let's work around by using the well-established "saved_errno" pattern,
along with returning -1 ourselves instead of "errno". The caller can
thus rely on our "errno" on failure.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61846 for a related
bug report against GCC.

1.

    trace2/tr2_dst.c: In function ‘tr2_dst_get_trace_fd.part.5’:
    trace2/tr2_dst.c:296:10: warning: ‘fd’ may be used uninitialized in this function [-Wmaybe-uninitialized]
      dst->fd = fd;
      ~~~~~~~~^~~~
    trace2/tr2_dst.c:229:6: note: ‘fd’ was declared here
      int fd;
          ^~
2. https://lore.kernel.org/git/20200404142131.GA679473@coredump.intra.peff.net/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 trace2/tr2_dst.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/trace2/tr2_dst.c b/trace2/tr2_dst.c
index 5dda0ca1cdb5d0..00314763505b03 100644
--- a/trace2/tr2_dst.c
+++ b/trace2/tr2_dst.c
@@ -115,15 +115,16 @@ static int tr2_dst_try_uds_connect(const char *path, int sock_type, int *out_fd)
 
 	fd = socket(AF_UNIX, sock_type, 0);
 	if (fd == -1)
-		return errno;
+		return -1;
 
 	sa.sun_family = AF_UNIX;
 	strlcpy(sa.sun_path, path, sizeof(sa.sun_path));
 
 	if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) == -1) {
-		int e = errno;
+		int saved_errno = errno;
 		close(fd);
-		return e;
+		errno = saved_errno;
+		return -1;
 	}
 
 	*out_fd = fd;
@@ -138,7 +139,6 @@ static int tr2_dst_try_unix_domain_socket(struct tr2_dst *dst,
 {
 	unsigned int uds_try = 0;
 	int fd;
-	int e;
 	const char *path = NULL;
 
 	/*
@@ -182,15 +182,13 @@ static int tr2_dst_try_unix_domain_socket(struct tr2_dst *dst,
 	}
 
 	if (uds_try & TR2_DST_UDS_TRY_STREAM) {
-		e = tr2_dst_try_uds_connect(path, SOCK_STREAM, &fd);
-		if (!e)
+		if (!tr2_dst_try_uds_connect(path, SOCK_STREAM, &fd))
 			goto connected;
-		if (e != EPROTOTYPE)
+		if (errno != EPROTOTYPE)
 			goto error;
 	}
 	if (uds_try & TR2_DST_UDS_TRY_DGRAM) {
-		e = tr2_dst_try_uds_connect(path, SOCK_DGRAM, &fd);
-		if (!e)
+		if (!tr2_dst_try_uds_connect(path, SOCK_DGRAM, &fd))
 			goto connected;
 	}
 
@@ -198,7 +196,7 @@ static int tr2_dst_try_unix_domain_socket(struct tr2_dst *dst,
 	if (tr2_dst_want_warning())
 		warning("trace2: could not connect to socket '%s' for '%s' tracing: %s",
 			path, tr2_sysenv_display_name(dst->sysenv_var),
-			strerror(e));
+			strerror(errno));
 
 	tr2_dst_trace_disable(dst);
 	return 0;

From 225f7fa84790a50dc95ffc21ef8f601f0f5e9da5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= <jn.avila@free.fr>
Date: Thu, 20 May 2021 09:42:14 +0200
Subject: [PATCH 048/110] help: fix small typo in error message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Classic string concatenation while forgetting a space character.

Signed-off-by: Jean-Noël Avila <jn.avila@free.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 list-objects-filter-options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 96a605c8adea3b..fd8d59f653ab9b 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -102,7 +102,7 @@ static int gently_parse_list_objects_filter(
 	} else if (skip_prefix(arg, "object:type=", &v0)) {
 		int type = type_from_string_gently(v0, strlen(v0), 1);
 		if (type < 0) {
-			strbuf_addf(errbuf, _("'%s' for 'object:type=<type>' is"
+			strbuf_addf(errbuf, _("'%s' for 'object:type=<type>' is "
 					      "not a valid object type"), v0);
 			return 1;
 		}

From 7a55fa0e0bb88e2bf721bc65cc55cd6b435a8373 Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:54 +0300
Subject: [PATCH 049/110] t4013: test that "-m" alone has no effect in "git
 log"

This is to notice current behavior that we are going to change when we
start to imply "-p" by "-m".

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4013-diff-various.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 87def81699bf82..e9f67cd2435139 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -452,6 +452,14 @@ diff-tree --stat --compact-summary initial mode
 diff-tree -R --stat --compact-summary initial mode
 EOF
 
+test_expect_success 'log -m matches pure log' '
+	git log master >result &&
+	process_diffs result >expected &&
+	git log -m >result &&
+	process_diffs result >actual &&
+	test_cmp expected actual
+'
+
 test_expect_success 'log --diff-merges=on matches --diff-merges=separate' '
 	git log -p --diff-merges=separate master >result &&
 	process_diffs result >expected &&

From 48229c193d2e6e728d3243bacea2f1e1490ced8a Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:55 +0300
Subject: [PATCH 050/110] t4013: test "git log -m --raw"

This is to ensure we won't break different diff formats when we start
to imply "-p" by "-m".

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4013-diff-various.sh          |  1 +
 t/t4013/diff.log_-m_--raw_master | 61 ++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 t/t4013/diff.log_-m_--raw_master

diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index e9f67cd2435139..1809355f9bb150 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -337,6 +337,7 @@ log -m -p --first-parent master
 log -m -p master
 log --cc -m -p master
 log -c -m -p master
+log -m --raw master
 log -SF master
 log -S F master
 log -SF -p master
diff --git a/t/t4013/diff.log_-m_--raw_master b/t/t4013/diff.log_-m_--raw_master
new file mode 100644
index 00000000000000..cd2ecc46283241
--- /dev/null
+++ b/t/t4013/diff.log_-m_--raw_master
@@ -0,0 +1,61 @@
+$ git log -m --raw master
+commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (from 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0)
+Merge: 9a6d494 c7a2ab9
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:04:00 2006 +0000
+
+    Merge branch 'side'
+
+:100644 100644 cead32e... 992913c... M	dir/sub
+:100644 100644 b414108... 10a8a9f... M	file0
+
+commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (from c7a2ab9e8eac7b117442a607d5a9b3950ae34d5a)
+Merge: 9a6d494 c7a2ab9
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:04:00 2006 +0000
+
+    Merge branch 'side'
+
+:100644 100644 7289e35... 992913c... M	dir/sub
+:100644 100644 f4615da... 10a8a9f... M	file0
+:000000 100644 0000000... b1e6722... A	file1
+:100644 000000 01e79c3... 0000000... D	file2
+:100644 000000 7289e35... 0000000... D	file3
+
+commit c7a2ab9e8eac7b117442a607d5a9b3950ae34d5a
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:03:00 2006 +0000
+
+    Side
+
+:100644 100644 35d242b... 7289e35... M	dir/sub
+:100644 100644 01e79c3... f4615da... M	file0
+:000000 100644 0000000... 7289e35... A	file3
+
+commit 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:02:00 2006 +0000
+
+    Third
+
+:100644 100644 8422d40... cead32e... M	dir/sub
+:000000 100644 0000000... b1e6722... A	file1
+
+commit 1bde4ae5f36c8d9abe3a0fce0c6aab3c4a12fe44
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:01:00 2006 +0000
+
+    Second
+    
+    This is the second commit.
+
+:100644 100644 35d242b... 8422d40... M	dir/sub
+:100644 100644 01e79c3... b414108... M	file0
+:100644 000000 01e79c3... 0000000... D	file2
+
+commit 444ac553ac7612cc88969031b02b3767fb8a353a
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:00:00 2006 +0000
+
+    Initial
+$

From faf16d4e97ed45e8e570a8d1d568449e948284f0 Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:56 +0300
Subject: [PATCH 051/110] t4013: test "git log -m --stat"

This is to ensure we won't break different diff formats when we start
to imply "-p" by "-m".

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4013-diff-various.sh           |  1 +
 t/t4013/diff.log_-m_--stat_master | 66 +++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 t/t4013/diff.log_-m_--stat_master

diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 1809355f9bb150..e53ca7aa503f6d 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -338,6 +338,7 @@ log -m -p master
 log --cc -m -p master
 log -c -m -p master
 log -m --raw master
+log -m --stat master
 log -SF master
 log -S F master
 log -SF -p master
diff --git a/t/t4013/diff.log_-m_--stat_master b/t/t4013/diff.log_-m_--stat_master
new file mode 100644
index 00000000000000..c7db084fd905d0
--- /dev/null
+++ b/t/t4013/diff.log_-m_--stat_master
@@ -0,0 +1,66 @@
+$ git log -m --stat master
+commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (from 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0)
+Merge: 9a6d494 c7a2ab9
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:04:00 2006 +0000
+
+    Merge branch 'side'
+
+ dir/sub | 2 ++
+ file0   | 3 +++
+ 2 files changed, 5 insertions(+)
+
+commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (from c7a2ab9e8eac7b117442a607d5a9b3950ae34d5a)
+Merge: 9a6d494 c7a2ab9
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:04:00 2006 +0000
+
+    Merge branch 'side'
+
+ dir/sub | 4 ++++
+ file0   | 3 +++
+ file1   | 3 +++
+ file2   | 3 ---
+ file3   | 4 ----
+ 5 files changed, 10 insertions(+), 7 deletions(-)
+
+commit c7a2ab9e8eac7b117442a607d5a9b3950ae34d5a
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:03:00 2006 +0000
+
+    Side
+
+ dir/sub | 2 ++
+ file0   | 3 +++
+ file3   | 4 ++++
+ 3 files changed, 9 insertions(+)
+
+commit 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:02:00 2006 +0000
+
+    Third
+
+ dir/sub | 2 ++
+ file1   | 3 +++
+ 2 files changed, 5 insertions(+)
+
+commit 1bde4ae5f36c8d9abe3a0fce0c6aab3c4a12fe44
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:01:00 2006 +0000
+
+    Second
+    
+    This is the second commit.
+
+ dir/sub | 2 ++
+ file0   | 3 +++
+ file2   | 3 ---
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+commit 444ac553ac7612cc88969031b02b3767fb8a353a
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:00:00 2006 +0000
+
+    Initial
+$

From 3ae7fe2b0f8528305bc871738dd12cde23024556 Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:57 +0300
Subject: [PATCH 052/110] t4013: test "git diff-tree -m"

We want to ensure we don't affect plumbing commands with our changes
of "-m" semantics, so add corresponding test.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4013-diff-various.sh          |  1 +
 t/t4013/diff.diff-tree_-m_master | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 t/t4013/diff.diff-tree_-m_master

diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index e53ca7aa503f6d..bdc23b1180b04a 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -293,6 +293,7 @@ diff-tree --stat initial mode
 diff-tree --summary initial mode
 
 diff-tree master
+diff-tree -m master
 diff-tree -p master
 diff-tree -p -m master
 diff-tree -c master
diff --git a/t/t4013/diff.diff-tree_-m_master b/t/t4013/diff.diff-tree_-m_master
new file mode 100644
index 00000000000000..6d0a2207fb3016
--- /dev/null
+++ b/t/t4013/diff.diff-tree_-m_master
@@ -0,0 +1,11 @@
+$ git diff-tree -m master
+59d314ad6f356dd08601a4cd5e530381da3e3c64
+:040000 040000 65f5c9dd60ce3b2b3324b618ac7accf8d912c113 0564e026437809817a64fff393079714b6dd4628 M	dir
+:100644 100644 b414108e81e5091fe0974a1858b4d0d22b107f70 10a8a9f3657f91a156b9f0184ed79a20adef9f7f M	file0
+59d314ad6f356dd08601a4cd5e530381da3e3c64
+:040000 040000 f977ed46ae6873c1c30ab878e15a4accedc3618b 0564e026437809817a64fff393079714b6dd4628 M	dir
+:100644 100644 f4615da674c09df322d6ba8d6b21ecfb1b1ba510 10a8a9f3657f91a156b9f0184ed79a20adef9f7f M	file0
+:000000 100644 0000000000000000000000000000000000000000 b1e67221afe8461efd244b487afca22d46b95eb8 A	file1
+:100644 000000 01e79c32a8c99c557f0757da7cb6d65b3414466d 0000000000000000000000000000000000000000 D	file2
+:100644 000000 7289e35bff32727c08dda207511bec138fdb9ea5 0000000000000000000000000000000000000000 D	file3
+$

From e0b16421b17fffa2544eebfcb570fdb766fd0bc4 Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:58 +0300
Subject: [PATCH 053/110] t4013: test "git diff-index -m"

-m in "git diff-index" means "match missing", that differs
from its meaning in "git diff". Let's check it in diff-index.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4013-diff-various.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index bdc23b1180b04a..e561a8e48521a0 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -494,6 +494,19 @@ test_expect_success 'git config log.diffMerges first-parent vs -m' '
 	test_cmp expected actual
 '
 
+# -m in "git diff-index" means "match missing", that differs
+# from its meaning in "git diff". Let's check it in diff-index.
+# The line in the output for removed file should disappear when
+# we provide -m in diff-index.
+test_expect_success 'git diff-index -m' '
+	rm -f file1 &&
+	git diff-index HEAD >without-m &&
+	lines_count=$(wc -l <without-m) &&
+	git diff-index -m HEAD >with-m &&
+	git restore file1 &&
+	test_line_count = $((lines_count - 1)) with-m
+'
+
 test_expect_success 'log -S requires an argument' '
 	test_must_fail git log -S
 '

From 19b2517f95a0a908a8ada7417cf0717299e7e1aa Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:46:59 +0300
Subject: [PATCH 054/110] diff-merges: move specific diff-index "-m" handling
 to diff-index

Move specific handling of "-m" for diff-index to diff-index.c, so
diff-merges is left to handle only diff for merges options.

Being a better design by itself, this is especially essential in
preparation for letting -m imply -p, as "diff-index -m" obviously
should not imply -p, as it's entirely unrelated.

To handle this, in addition to moving specific diff-index "-m" code
out of diff-merges, we introduce new

  diff_merges_suppress_options_parsing()

and call it before generic options processing in cmd_diff_index().

This new diff_merges_suppress_options_parsing() could then be reused
and called before invocations of setup_revisions() for other commands
that don't need --diff-merges options, but that's outside of the scope
of these patch series.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/diff-index.c |  9 +++++++++
 diff-merges.c        | 25 +++++++++++++------------
 diff-merges.h        |  2 ++
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/builtin/diff-index.c b/builtin/diff-index.c
index 176fe7ff2b4e15..cf09559e422d66 100644
--- a/builtin/diff-index.c
+++ b/builtin/diff-index.c
@@ -2,6 +2,7 @@
 #include "cache.h"
 #include "config.h"
 #include "diff.h"
+#include "diff-merges.h"
 #include "commit.h"
 #include "revision.h"
 #include "builtin.h"
@@ -27,6 +28,12 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix)
 	rev.abbrev = 0;
 	prefix = precompose_argv_prefix(argc, argv, prefix);
 
+	/*
+	 * We need no diff for merges options, and we need to avoid conflict
+	 * with our own meaning of "-m".
+	 */
+	diff_merges_suppress_options_parsing();
+
 	argc = setup_revisions(argc, argv, &rev, NULL);
 	for (i = 1; i < argc; i++) {
 		const char *arg = argv[i];
@@ -35,6 +42,8 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix)
 			option |= DIFF_INDEX_CACHED;
 		else if (!strcmp(arg, "--merge-base"))
 			option |= DIFF_INDEX_MERGE_BASE;
+		else if (!strcmp(arg, "-m"))
+			rev.match_missing = 1;
 		else
 			usage(diff_cache_usage);
 	}
diff --git a/diff-merges.c b/diff-merges.c
index f3a9daed7e0519..9ca00cdd0cc6b2 100644
--- a/diff-merges.c
+++ b/diff-merges.c
@@ -6,6 +6,7 @@ typedef void (*diff_merges_setup_func_t)(struct rev_info *);
 static void set_separate(struct rev_info *revs);
 
 static diff_merges_setup_func_t set_to_default = set_separate;
+static int suppress_parsing;
 
 static void suppress(struct rev_info *revs)
 {
@@ -30,17 +31,6 @@ static void set_first_parent(struct rev_info *revs)
 	revs->first_parent_merges = 1;
 }
 
-static void set_m(struct rev_info *revs)
-{
-	/*
-	 * To "diff-index", "-m" means "match missing", and to the "log"
-	 * family of commands, it means "show default diff for merges". Set
-	 * both fields appropriately.
-	 */
-	set_to_default(revs);
-	revs->match_missing = 1;
-}
-
 static void set_combined(struct rev_info *revs)
 {
 	suppress(revs);
@@ -101,14 +91,22 @@ int diff_merges_config(const char *value)
 	return 0;
 }
 
+void diff_merges_suppress_options_parsing(void)
+{
+	suppress_parsing = 1;
+}
+
 int diff_merges_parse_opts(struct rev_info *revs, const char **argv)
 {
 	int argcount = 1;
 	const char *optarg;
 	const char *arg = argv[0];
 
+	if (suppress_parsing)
+		return 0;
+
 	if (!strcmp(arg, "-m")) {
-		set_m(revs);
+		set_to_default(revs);
 	} else if (!strcmp(arg, "-c")) {
 		set_combined(revs);
 		revs->combined_imply_patch = 1;
@@ -155,6 +153,9 @@ void diff_merges_set_dense_combined_if_unset(struct rev_info *revs)
 
 void diff_merges_setup_revs(struct rev_info *revs)
 {
+	if (suppress_parsing)
+		return;
+
 	if (revs->combine_merges == 0)
 		revs->dense_combined_merges = 0;
 	if (revs->separate_merges == 0)
diff --git a/diff-merges.h b/diff-merges.h
index 09d9a6c9a4fbec..b5d57f6563e3ab 100644
--- a/diff-merges.h
+++ b/diff-merges.h
@@ -11,6 +11,8 @@ struct rev_info;
 
 int diff_merges_config(const char *value);
 
+void diff_merges_suppress_options_parsing(void);
+
 int diff_merges_parse_opts(struct rev_info *revs, const char **argv);
 
 void diff_merges_suppress(struct rev_info *revs);

From 23f6d40dd3e7aad72c7706bb75f77a6c73b69b56 Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:47:00 +0300
Subject: [PATCH 055/110] git-svn: stop passing "-m" to "git rev-list"

rev-list doesn't utilize -m. It happens to eat it silently, so this
bug went unnoticed.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 perl/Git/SVN.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index f6f1dc03c60861..35ff5a68963db9 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1636,7 +1636,7 @@ sub has_no_changes {
 	my $commit = shift;
 
 	my @revs = split / /, command_oneline(
-		qw(rev-list --parents -1 -m), $commit);
+		qw(rev-list --parents -1), $commit);
 
 	# Commits with no parents, e.g. the start of a partial branch,
 	# have changes by definition.

From 1e20a407fe28b4bdba3b973b26b0b60ff6b6c97d Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:47:01 +0300
Subject: [PATCH 056/110] stash list: stop passing "-m" to "git log"

Passing "-m" in "git log --first-parent -m" is not needed as
--first-parent implies --diff-merges=first-parent anyway. OTOH, it
will stop being harmless once we let "-m" imply "-p".

While we are at it, fix corresponding test description in t3903-stash
to match what it actually tests.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/stash.c  | 2 +-
 t/t3903-stash.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/stash.c b/builtin/stash.c
index ba774cce674f33..7c92bb51cbaef9 100644
--- a/builtin/stash.c
+++ b/builtin/stash.c
@@ -759,7 +759,7 @@ static int list_stash(int argc, const char **argv, const char *prefix)
 
 	cp.git_cmd = 1;
 	strvec_pushl(&cp.args, "log", "--format=%gd: %gs", "-g",
-		     "--first-parent", "-m", NULL);
+		     "--first-parent", NULL);
 	strvec_pushv(&cp.args, argv);
 	strvec_push(&cp.args, ref_stash);
 	strvec_push(&cp.args, "--");
diff --git a/t/t3903-stash.sh b/t/t3903-stash.sh
index 5f282ecf6175d3..873aa56e359d64 100755
--- a/t/t3903-stash.sh
+++ b/t/t3903-stash.sh
@@ -859,7 +859,7 @@ test_expect_success 'setup stash with index and worktree changes' '
 	git stash
 '
 
-test_expect_success 'stash list implies --first-parent -m' '
+test_expect_success 'stash list -p shows simple diff' '
 	cat >expect <<-EOF &&
 	stash@{0}
 

From fd16a39944ce2f2967ed015379f5bd05bac639bb Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:47:02 +0300
Subject: [PATCH 057/110] diff-merges: rename "combined_imply_patch" to
 "merges_imply_patch"

This is refactoring change in preparation for the next commit that
will let -m imply -p.

The old name doesn't match the intention to let not only -c/-cc imply
-p, but also -m, that is not a "combined" format, so we rename the
flag accordingly.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff-merges.c | 10 +++++-----
 revision.h    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/diff-merges.c b/diff-merges.c
index 9ca00cdd0cc6b2..d897fd8a293319 100644
--- a/diff-merges.c
+++ b/diff-merges.c
@@ -15,7 +15,7 @@ static void suppress(struct rev_info *revs)
 	revs->combine_merges = 0;
 	revs->dense_combined_merges = 0;
 	revs->combined_all_paths = 0;
-	revs->combined_imply_patch = 0;
+	revs->merges_imply_patch = 0;
 	revs->merges_need_diff = 0;
 }
 
@@ -109,10 +109,10 @@ int diff_merges_parse_opts(struct rev_info *revs, const char **argv)
 		set_to_default(revs);
 	} else if (!strcmp(arg, "-c")) {
 		set_combined(revs);
-		revs->combined_imply_patch = 1;
+		revs->merges_imply_patch = 1;
 	} else if (!strcmp(arg, "--cc")) {
 		set_dense_combined(revs);
-		revs->combined_imply_patch = 1;
+		revs->merges_imply_patch = 1;
 	} else if (!strcmp(arg, "--no-diff-merges")) {
 		suppress(revs);
 	} else if (!strcmp(arg, "--combined-all-paths")) {
@@ -162,9 +162,9 @@ void diff_merges_setup_revs(struct rev_info *revs)
 		revs->first_parent_merges = 0;
 	if (revs->combined_all_paths && !revs->combine_merges)
 		die("--combined-all-paths makes no sense without -c or --cc");
-	if (revs->combined_imply_patch)
+	if (revs->merges_imply_patch)
 		revs->diff = 1;
-	if (revs->combined_imply_patch || revs->merges_need_diff) {
+	if (revs->merges_imply_patch || revs->merges_need_diff) {
 		if (!revs->diffopt.output_format)
 			revs->diffopt.output_format = DIFF_FORMAT_PATCH;
 	}
diff --git a/revision.h b/revision.h
index e6be3c845e66eb..5ea881d189af16 100644
--- a/revision.h
+++ b/revision.h
@@ -195,10 +195,10 @@ struct rev_info {
 			/* Diff-merge flags */
 			explicit_diff_merges: 1,
 			merges_need_diff: 1,
+			merges_imply_patch:1,
 			separate_merges: 1,
 			combine_merges:1,
 			combined_all_paths:1,
-			combined_imply_patch:1,
 			dense_combined_merges:1,
 			first_parent_merges:1;
 

From f5bfcc823ba242a46e20fb6f71c9fbf7ebb222fe Mon Sep 17 00:00:00 2001
From: Sergey Organov <sorganov@gmail.com>
Date: Fri, 21 May 2021 00:47:03 +0300
Subject: [PATCH 058/110] diff-merges: let "-m" imply "-p"

Fix long standing inconsistency between -c/--cc that do imply -p on
one side, and -m that did not imply -p on the other side.

Change corresponding test accordingly, as "log -m" output should now
match one from "log -m -p", rather than from just "log".

Change documentation accordingly.

NOTES:

After this patch

  git log -m

produces diffs without need to provide -p as well, that improves both
consistency and usability. It gets even more useful if one sets
"log.diffMerges" configuration variable to "first-parent" to force -m
produce usual diff with respect to first parent only.

This patch, however, does not change behavior when specific diff
format is explicitly provided on the command-line, so that commands
like

  git log -m --raw
  git log -m --stat

are not affected, nor does it change commands where specific diff
format is active by default, such as:

  git diff-tree -m

It's also worth to be noticed that exact historical semantics of -m is
still provided by --diff-merges=separate.

Signed-off-by: Sergey Organov <sorganov@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt | 8 ++++----
 diff-merges.c                  | 1 +
 t/t4013-diff-various.sh        | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 6d968b9012dcbe..2825783049c510 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -49,10 +49,9 @@ ifdef::git-log[]
 --diff-merges=m:::
 -m:::
 	This option makes diff output for merge commits to be shown in
-	the default format. `-m` will produce the output only if `-p`
-	is given as well. The default format could be changed using
+	the default format. The default format could be changed using
 	`log.diffMerges` configuration parameter, which default value
-	is `separate`.
+	is `separate`. `-m` implies `-p`.
 +
 --diff-merges=first-parent:::
 --diff-merges=1:::
@@ -62,7 +61,8 @@ ifdef::git-log[]
 --diff-merges=separate:::
 	This makes merge commits show the full diff with respect to
 	each of the parents. Separate log entry and diff is generated
-	for each parent.
+	for each parent. This is the format that `-m` produced
+	historically.
 +
 --diff-merges=combined:::
 --diff-merges=c:::
diff --git a/diff-merges.c b/diff-merges.c
index d897fd8a293319..0dfcaa1b11b06c 100644
--- a/diff-merges.c
+++ b/diff-merges.c
@@ -107,6 +107,7 @@ int diff_merges_parse_opts(struct rev_info *revs, const char **argv)
 
 	if (!strcmp(arg, "-m")) {
 		set_to_default(revs);
+		revs->merges_imply_patch = 1;
 	} else if (!strcmp(arg, "-c")) {
 		set_combined(revs);
 		revs->merges_imply_patch = 1;
diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index e561a8e48521a0..7fadc985cccd05 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -455,8 +455,8 @@ diff-tree --stat --compact-summary initial mode
 diff-tree -R --stat --compact-summary initial mode
 EOF
 
-test_expect_success 'log -m matches pure log' '
-	git log master >result &&
+test_expect_success 'log -m matches log -m -p' '
+	git log -m -p master >result &&
 	process_diffs result >expected &&
 	git log -m >result &&
 	process_diffs result >actual &&

From 5317dfeaed30fbe647899ece261e3dd40e5ced1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?=
 <congdanhqx@gmail.com>
Date: Sat, 22 May 2021 14:10:08 +0700
Subject: [PATCH 059/110] t: use configured TAR instead of tar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Despite that tar is available everywhere, it's not required by POSIX.

In our build system, users are allowed to specify which tar to be used
in Makefile knobs. Furthermore, GNU tar (gtar) is prefered when autotools
is being used.

In our testsuite, 7 out of 9 tar-required-tests use "$TAR", the other
two use "tar".

Let's change the remaining two tests to "$TAR".

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t3513-revert-submodule.sh | 4 ++--
 t/t6041-bisect-submodule.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t3513-revert-submodule.sh b/t/t3513-revert-submodule.sh
index a759f12cbb1d6b..6e21278cfe77c8 100755
--- a/t/t3513-revert-submodule.sh
+++ b/t/t3513-revert-submodule.sh
@@ -14,7 +14,7 @@ test_description='revert can handle submodules'
 git_revert () {
 	git status -su >expect &&
 	ls -1pR * >>expect &&
-	tar cf "$TRASH_DIRECTORY/tmp.tar" * &&
+	"$TAR" cf "$TRASH_DIRECTORY/tmp.tar" * &&
 	may_only_be_test_must_fail "$2" &&
 	$2 git checkout "$1" &&
 	if test -n "$2"
@@ -23,7 +23,7 @@ git_revert () {
 	fi &&
 	git revert HEAD &&
 	rm -rf * &&
-	tar xf "$TRASH_DIRECTORY/tmp.tar" &&
+	"$TAR" xf "$TRASH_DIRECTORY/tmp.tar" &&
 	git status -su >actual &&
 	ls -1pR * >>actual &&
 	test_cmp expect actual &&
diff --git a/t/t6041-bisect-submodule.sh b/t/t6041-bisect-submodule.sh
index df1eff0fb831e3..82013fc9037c1d 100755
--- a/t/t6041-bisect-submodule.sh
+++ b/t/t6041-bisect-submodule.sh
@@ -8,7 +8,7 @@ test_description='bisect can handle submodules'
 git_bisect () {
 	git status -su >expect &&
 	ls -1pR * >>expect &&
-	tar cf "$TRASH_DIRECTORY/tmp.tar" * &&
+	"$TAR" cf "$TRASH_DIRECTORY/tmp.tar" * &&
 	GOOD=$(git rev-parse --verify HEAD) &&
 	may_only_be_test_must_fail "$2" &&
 	$2 git checkout "$1" &&
@@ -25,7 +25,7 @@ git_bisect () {
 	git bisect start &&
 	git bisect good $GOOD &&
 	rm -rf * &&
-	tar xf "$TRASH_DIRECTORY/tmp.tar" &&
+	"$TAR" xf "$TRASH_DIRECTORY/tmp.tar" &&
 	git status -su >actual &&
 	ls -1pR * >>actual &&
 	test_cmp expect actual &&

From 12d078ed2b64df220f2fe8f6a050f271133ccda9 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 21 May 2021 17:29:37 -0500
Subject: [PATCH 060/110] doc: refactor common asciidoc dependencies

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2aae4c9cbb3503..46d9b98dac63fc 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -139,6 +139,7 @@ ASCIIDOC_CONF = -f asciidoc.conf
 ASCIIDOC_COMMON = $(ASCIIDOC) $(ASCIIDOC_EXTRA) $(ASCIIDOC_CONF) \
 		-amanversion=$(GIT_VERSION) \
 		-amanmanual='Git Manual' -amansource='Git'
+ASCIIDOC_DEPS = asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
 TXT_TO_HTML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_HTML)
 TXT_TO_XML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_DOCBOOK)
 MANPAGE_XSL = manpage-normal.xsl
@@ -354,12 +355,12 @@ clean:
 	$(RM) manpage-base-url.xsl
 	$(RM) GIT-ASCIIDOCFLAGS
 
-$(MAN_HTML): %.html : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
+$(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS)
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
 	$(TXT_TO_HTML) -d manpage -o $@+ $< && \
 	mv $@+ $@
 
-$(OBSOLETE_HTML): %.html : %.txto asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
+$(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS)
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
 	$(TXT_TO_HTML) -o $@+ $< && \
 	mv $@+ $@
@@ -371,7 +372,7 @@ manpage-base-url.xsl: manpage-base-url.xsl.in
 	$(QUIET_XMLTO)$(RM) $@ && \
 	$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
-%.xml : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
+%.xml : %.txt $(ASCIIDOC_DEPS)
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
 	$(TXT_TO_XML) -d manpage -o $@+ $< && \
 	mv $@+ $@

From 56da21392b10ebe161dbf426f170e74f800178cf Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 21 May 2021 17:29:38 -0500
Subject: [PATCH 061/110] doc: improve asciidoc dependencies

asciidoc needs asciidoc.conf, asciidoctor asciidoctor-extensions.rb.

Neither needs the other.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 46d9b98dac63fc..0f59cc0853d84e 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -139,7 +139,7 @@ ASCIIDOC_CONF = -f asciidoc.conf
 ASCIIDOC_COMMON = $(ASCIIDOC) $(ASCIIDOC_EXTRA) $(ASCIIDOC_CONF) \
 		-amanversion=$(GIT_VERSION) \
 		-amanmanual='Git Manual' -amansource='Git'
-ASCIIDOC_DEPS = asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
+ASCIIDOC_DEPS = asciidoc.conf GIT-ASCIIDOCFLAGS
 TXT_TO_HTML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_HTML)
 TXT_TO_XML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_DOCBOOK)
 MANPAGE_XSL = manpage-normal.xsl
@@ -194,6 +194,7 @@ ASCIIDOC_DOCBOOK = docbook5
 ASCIIDOC_EXTRA += -acompat-mode -atabsize=8
 ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
 ASCIIDOC_EXTRA += -alitdd='&\#x2d;&\#x2d;'
+ASCIIDOC_DEPS = asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
 DBLATEX_COMMON =
 XMLTO_EXTRA += --skip-validation
 XMLTO_EXTRA += -x manpage.xsl

From 471e7b2cf6ab5ef057feed03b35dde9a26357611 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 21 May 2021 17:29:39 -0500
Subject: [PATCH 062/110] doc: remove unnecessary rm instances

Commits 50cff52f1a (When generating manpages, delete outdated targets
first., 2007-08-02) and f9286765b2 (Documentation/Makefile: remove
cmd-list.made before redirecting to it., 2007-08-06) created these rm
instances for a very rare corner-case: building as root by mistake.

It's odd to have workarounds here, but nowhere else in the Makefile--
which already fails in this stuation, starting from
Documentation/technical/.

We gain nothing but complexity, so let's remove them.

Comments-by: Jeff King <peff@peff.net>
Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 0f59cc0853d84e..73b88c3aad0376 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -296,8 +296,7 @@ docdep_prereqs = \
 	cmd-list.made $(cmds_txt)
 
 doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl
-	$(QUIET_GEN)$(RM) $@+ $@ && \
-	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+	$(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
 	mv $@+ $@
 
 ifneq ($(MAKECMDGOALS),clean)
@@ -318,8 +317,7 @@ cmds_txt = cmds-ancillaryinterrogators.txt \
 $(cmds_txt): cmd-list.made
 
 cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
-	$(QUIET_GEN)$(RM) $@ && \
-	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \
+	$(QUIET_GEN)$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \
 	date >$@
 
 mergetools_txt = mergetools-diff.txt mergetools-merge.txt
@@ -327,7 +325,7 @@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt
 $(mergetools_txt): mergetools-list.made
 
 mergetools-list.made: ../git-mergetool--lib.sh $(wildcard ../mergetools/*)
-	$(QUIET_GEN)$(RM) $@ && \
+	$(QUIET_GEN) \
 	$(SHELL_PATH) -c 'MERGE_TOOLS_DIR=../mergetools && \
 		. ../git-mergetool--lib.sh && \
 		show_tool_names can_diff "* " || :' >mergetools-diff.txt && \
@@ -357,30 +355,25 @@ clean:
 	$(RM) GIT-ASCIIDOCFLAGS
 
 $(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(TXT_TO_HTML) -d manpage -o $@+ $< && \
+	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@+ $< && \
 	mv $@+ $@
 
 $(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(TXT_TO_HTML) -o $@+ $< && \
+	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@+ $< && \
 	mv $@+ $@
 
 manpage-base-url.xsl: manpage-base-url.xsl.in
 	$(QUIET_GEN)sed "s|@@MAN_BASE_URL@@|$(MAN_BASE_URL)|" $< > $@
 
 %.1 %.5 %.7 : %.xml manpage-base-url.xsl $(wildcard manpage*.xsl)
-	$(QUIET_XMLTO)$(RM) $@ && \
-	$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+	$(QUIET_XMLTO)$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
 %.xml : %.txt $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(TXT_TO_XML) -d manpage -o $@+ $< && \
+	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@+ $< && \
 	mv $@+ $@
 
 user-manual.xml: user-manual.txt user-manual.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(TXT_TO_XML) -d book -o $@+ $< && \
+	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@+ $< && \
 	mv $@+ $@
 
 technical/api-index.txt: technical/api-index-skel.txt \
@@ -402,27 +395,24 @@ XSLTOPTS += --stringparam html.stylesheet docbook-xsl.css
 XSLTOPTS += --param generate.consistent.ids 1
 
 user-manual.html: user-manual.xml $(XSLT)
-	$(QUIET_XSLTPROC)$(RM) $@+ $@ && \
-	xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \
 	mv $@+ $@
 
 git.info: user-manual.texi
 	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
 
 user-manual.texi: user-manual.xml
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
 	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
 	rm $@++ && \
 	mv $@+ $@
 
 user-manual.pdf: user-manual.xml
-	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
-	$(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \
+	$(QUIET_DBLATEX)$(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \
 	mv $@+ $@
 
 gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(QUIET_DB2TEXI) \
 	($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \
 		$(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \
 		rm $(xml)+ &&) true) > $@++ && \
@@ -434,13 +424,11 @@ gitman.info: gitman.texi
 	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
 
 $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
 	mv $@+ $@
 
 howto-index.txt: howto-index.sh $(HOWTO_TXT)
-	$(QUIET_GEN)$(RM) $@+ $@ && \
-	'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@+ && \
+	$(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@+ && \
 	mv $@+ $@
 
 $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
@@ -450,7 +438,7 @@ WEBDOC_DEST = /pub/software/scm/git/docs
 
 howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../
 $(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(QUIET_ASCIIDOC) \
 	sed -e '1,/^$$/d' $< | \
 	$(TXT_TO_HTML) - >$@+ && \
 	mv $@+ $@

From db10fc6c09f1f74c4d0a9294ecbb68d390f54f15 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 21 May 2021 17:29:40 -0500
Subject: [PATCH 063/110] doc: simplify Makefile using .DELETE_ON_ERROR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently GNU make already removes files when catching an interruption
signal, however, in order to deal with other kinds of errors a
workaround is in place to store target output to a temporary file, and
only move it to its right place on success.

By enabling the built-in .DELETE_ON_ERROR we let make do this task, so
we don't have to.

This way the rules can be simplified a lot.

Suggested-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile | 47 +++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 28 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 73b88c3aad0376..eaff97dcb8b1b3 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -296,8 +296,7 @@ docdep_prereqs = \
 	cmd-list.made $(cmds_txt)
 
 doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl
-	$(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
-	mv $@+ $@
+	$(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@ $(QUIET_STDERR)
 
 ifneq ($(MAKECMDGOALS),clean)
 -include doc.dep
@@ -355,12 +354,10 @@ clean:
 	$(RM) GIT-ASCIIDOCFLAGS
 
 $(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@+ $< && \
-	mv $@+ $@
+	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@ $<
 
 $(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@+ $< && \
-	mv $@+ $@
+	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@ $<
 
 manpage-base-url.xsl: manpage-base-url.xsl.in
 	$(QUIET_GEN)sed "s|@@MAN_BASE_URL@@|$(MAN_BASE_URL)|" $< > $@
@@ -369,12 +366,10 @@ manpage-base-url.xsl: manpage-base-url.xsl.in
 	$(QUIET_XMLTO)$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
 %.xml : %.txt $(ASCIIDOC_DEPS)
-	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@+ $< && \
-	mv $@+ $@
+	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@ $<
 
 user-manual.xml: user-manual.txt user-manual.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS
-	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@+ $< && \
-	mv $@+ $@
+	$(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@ $<
 
 technical/api-index.txt: technical/api-index-skel.txt \
 	technical/api-index.sh $(patsubst %,%.txt,$(API_DOCS))
@@ -395,41 +390,35 @@ XSLTOPTS += --stringparam html.stylesheet docbook-xsl.css
 XSLTOPTS += --param generate.consistent.ids 1
 
 user-manual.html: user-manual.xml $(XSLT)
-	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \
-	mv $@+ $@
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
 
 git.info: user-manual.texi
 	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
 
 user-manual.texi: user-manual.xml
-	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
-	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
-	rm $@++ && \
-	mv $@+ $@
+	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@+ && \
+	$(PERL_PATH) fix-texi.perl <$@+ >$@ && \
+	rm $@+
 
 user-manual.pdf: user-manual.xml
-	$(QUIET_DBLATEX)$(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \
-	mv $@+ $@
+	$(QUIET_DBLATEX)$(DBLATEX) -o $@ $(DBLATEX_COMMON) $<
 
 gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl
 	$(QUIET_DB2TEXI) \
 	($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \
 		$(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \
-		rm $(xml)+ &&) true) > $@++ && \
-	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
-	rm $@++ && \
-	mv $@+ $@
+		rm $(xml)+ &&) true) > $@+ && \
+	$(PERL_PATH) cat-texi.perl $@ <$@+ >$@ && \
+	rm $@+
 
 gitman.info: gitman.texi
 	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
 
 $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
-	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
-	mv $@+ $@
+	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@
 
 howto-index.txt: howto-index.sh $(HOWTO_TXT)
-	$(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@+ && \
-	mv $@+ $@
+	$(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@
 
 $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
 	$(QUIET_ASCIIDOC)$(TXT_TO_HTML) $*.txt
@@ -440,8 +429,7 @@ howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../
 $(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS
 	$(QUIET_ASCIIDOC) \
 	sed -e '1,/^$$/d' $< | \
-	$(TXT_TO_HTML) - >$@+ && \
-	mv $@+ $@
+	$(TXT_TO_HTML) - >$@
 
 install-webdoc : html
 	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
@@ -482,4 +470,7 @@ doc-l10n install-l10n::
 	$(MAKE) -C po $@
 endif
 
+# Delete the target file on error
+.DELETE_ON_ERROR:
+
 .PHONY: FORCE

From 7ba30167291eb89f2e587b7cabfa4e7555de4ed5 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 21 May 2021 17:29:41 -0500
Subject: [PATCH 064/110] doc: avoid using rm directly

That's what we have $(RM) for.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index eaff97dcb8b1b3..f5605b7767f3c7 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -398,7 +398,7 @@ git.info: user-manual.texi
 user-manual.texi: user-manual.xml
 	$(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@+ && \
 	$(PERL_PATH) fix-texi.perl <$@+ >$@ && \
-	rm $@+
+	$(RM) $@+
 
 user-manual.pdf: user-manual.xml
 	$(QUIET_DBLATEX)$(DBLATEX) -o $@ $(DBLATEX_COMMON) $<
@@ -407,9 +407,9 @@ gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl
 	$(QUIET_DB2TEXI) \
 	($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \
 		$(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \
-		rm $(xml)+ &&) true) > $@+ && \
+		$(RM) $(xml)+ &&) true) > $@+ && \
 	$(PERL_PATH) cat-texi.perl $@ <$@+ >$@ && \
-	rm $@+
+	$(RM) $@+
 
 gitman.info: gitman.texi
 	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi

From 3127ff90ea359c5be0d61c2885b3f0f74d602faf Mon Sep 17 00:00:00 2001
From: Teng Long <dyroneteng@gmail.com>
Date: Thu, 13 May 2021 15:15:47 +0800
Subject: [PATCH 065/110] packfile-uri.txt: fix blobPackfileUri description

Fix the 'uploadpack.blobPackfileUri' description in packfile-uri.txt
and the correct format also can be seen in t5702.

Signed-off-by: Teng Long <dyroneteng@gmail.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/packfile-uri.txt | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76838d..1eb525fe760461 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,14 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
 at least so that we can test the client.
 
 This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more `uploadpack.blobPackfileUri=
+<object-hash> <pack-hash> <uri>` entries. Whenever the list of objects to be
+sent is assembled, all such blobs are excluded, replaced with URIs. As noted
+in "Future work" below, the server can evolve in the future to support
+excluding other objects (or other implementations of servers could be made
+that support excluding other objects) without needing a protocol change, so
+clients should not expect that packfiles downloaded in this way only contain
+single blobs.
 
 Client design
 -------------

From 211eca0895794362184da2be2a2d812d070719d3 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Thu, 10 Jun 2021 11:37:04 +0900
Subject: [PATCH 066/110] The first batch post Git 2.32

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes/2.33.0.txt | 32 +++++++++++++++++++++++++++++++
 RelNotes                          |  2 +-
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/RelNotes/2.33.0.txt

diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt
new file mode 100644
index 00000000000000..6795a2734fcb17
--- /dev/null
+++ b/Documentation/RelNotes/2.33.0.txt
@@ -0,0 +1,32 @@
+Git 2.33 Release Notes
+======================
+
+Updates since Git 2.32
+----------------------
+
+UI, Workflows & Features
+
+Performance, Internal Implementation, Development Support etc.
+
+
+Fixes since v2.31
+-----------------
+
+ * We historically rejected a very short string as an author name
+   while accepting a patch e-mail, which has been loosened.
+   (merge 72ee47ceeb ef/mailinfo-short-name later to maint).
+
+ * The parallel checkout codepath did not initialize object ID field
+   used to talk to the worker processes in a futureproof way.
+
+ * Rewrite code that triggers undefined behaiour warning.
+   (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint).
+
+ * The description of "fast-forward" in the glossary has been updated.
+   (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint).
+
+ * Other code cleanup, docfix, build fix, etc.
+   (merge bfe35a6165 ah/doc-describe later to maint).
+   (merge f302c1e4aa jc/clarify-revision-range later to maint).
+   (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint).
+   (merge a84216c684 jk/doc-color-pager later to maint).
diff --git a/RelNotes b/RelNotes
index aece21e8a40e44..f071367dc30cd9 120000
--- a/RelNotes
+++ b/RelNotes
@@ -1 +1 @@
-Documentation/RelNotes/2.32.0.txt
\ No newline at end of file
+Documentation/RelNotes/2.33.0.txt
\ No newline at end of file

From 670b81a890388c60b7032a4f5b879f2ece8c4558 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 14 Jun 2021 13:23:28 +0900
Subject: [PATCH 067/110] The second batch

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes/2.33.0.txt | 50 +++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt
index 6795a2734fcb17..57443c746628fb 100644
--- a/Documentation/RelNotes/2.33.0.txt
+++ b/Documentation/RelNotes/2.33.0.txt
@@ -1,15 +1,49 @@
 Git 2.33 Release Notes
 ======================
 
+Backward compatibility notes
+----------------------------
+
+ * The "-m" option in "git log -m" that does not specify which format,
+   if any, of diff is desired did not have any visible effect; it now
+   implies some form of diff (by default "--patch") is produced.
+
+   You can disable the diff output with "git log -m --no-patch", but
+   then there probably isn't much point in passing "-m" in the first
+   place ;-).
+
+
 Updates since Git 2.32
 ----------------------
 
 UI, Workflows & Features
 
+ * "git send-email" learned the "--sendmail-cmd" command line option
+   and the "sendemail.sendmailCmd" configuration variable, which is a
+   more sensible approach than the current way of repurposing the
+   "smtp-server" that is meant to name the server to instead name the
+   command to talk to the server.
+
+ * The "-m" option in "git log -m" that does not specify which format,
+   if any, of diff is desired did not have any visible effect; it now
+   implies some form of diff (by default "--patch") is produced.
+
+
 Performance, Internal Implementation, Development Support etc.
 
+ * The code to handle the "--format" option in "for-each-ref" and
+   friends made too many string comparisons on %(atom)s used in the
+   format string, which has been corrected by converting them into
+   enum when the format string is parsed.
+
+ * Use the hashfile API in the codepath that writes the index file to
+   reduce code duplication.
+
+ * Repeated rename detections in a sequence of mergy operations have
+   been optimize out.
 
-Fixes since v2.31
+
+Fixes since v2.32
 -----------------
 
  * We historically rejected a very short string as an author name
@@ -19,14 +53,26 @@ Fixes since v2.31
  * The parallel checkout codepath did not initialize object ID field
    used to talk to the worker processes in a futureproof way.
 
- * Rewrite code that triggers undefined behaiour warning.
+ * Rewrite code that triggers undefined behaviour warning.
    (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint).
 
  * The description of "fast-forward" in the glossary has been updated.
    (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint).
 
+ * Recent "git clone" left a temporary directory behind when the
+   transport layer returned an failure.
+   (merge 6aacb7d861 jk/clone-clean-upon-transport-error later to maint).
+
+ * "git fetch" over protocol v2 left its side of the socket open after
+   it finished speaking, which unnecessarily wasted the resource on
+   the other side.
+   (merge ae1a7eefff jk/fetch-pack-v2-half-close-early later to maint).
+
  * Other code cleanup, docfix, build fix, etc.
    (merge bfe35a6165 ah/doc-describe later to maint).
    (merge f302c1e4aa jc/clarify-revision-range later to maint).
    (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint).
    (merge a84216c684 jk/doc-color-pager later to maint).
+   (merge 4e0a64a713 ab/trace2-squelch-gcc-warning later to maint).
+   (merge 225f7fa847 ps/rev-list-object-type-filter later to maint).
+   (merge 5317dfeaed dd/honor-users-tar-in-tests later to maint).

From fc6609d198c47bf511f3388975c7c94e0aa1af2b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:25 +0000
Subject: [PATCH 068/110] sparse-index: skip indexes with unmerged entries

The sparse-index format is designed to be compatible with merge
conflicts, even those outside the sparse-checkout definition. The reason
is that when converting a full index to a sparse one, a cache entry with
nonzero stage will not be collapsed into a sparse directory entry.

However, this behavior was not tested, and a different behavior within
convert_to_sparse() fails in this scenario. Specifically,
cache_tree_update() will fail when unmerged entries exist.
convert_to_sparse_rec() uses the cache-tree data to recursively walk the
tree structure, but also to compute the OIDs used in the
sparse-directory entries.

Add an index scan to convert_to_sparse() that will detect if these merge
conflict entries exist and skip the conversion before trying to update
the cache-tree. This is marked as NEEDSWORK because this can be removed
with a suitable update to cache_tree_update() or a similar method that
can construct a cache-tree with invalid nodes, but still allow creating
the nodes necessary for creating sparse directory entries.

It is possible that in the future we will not need to make such an
update, since if we do not expand a sparse-index into a full one, this
conversion does not need to happen. Thus, this can be deferred until the
merge machinery is made to integrate with the sparse-index.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sparse-index.c                           | 18 ++++++++++++++++++
 t/t1092-sparse-checkout-compatibility.sh | 22 ++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/sparse-index.c b/sparse-index.c
index affc4048f279f0..2c6959302755da 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -116,6 +116,17 @@ int set_sparse_index_config(struct repository *repo, int enable)
 	return res;
 }
 
+static int index_has_unmerged_entries(struct index_state *istate)
+{
+	int i;
+	for (i = 0; i < istate->cache_nr; i++) {
+		if (ce_stage(istate->cache[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 int convert_to_sparse(struct index_state *istate)
 {
 	int test_env;
@@ -152,6 +163,13 @@ int convert_to_sparse(struct index_state *istate)
 		return -1;
 	}
 
+	/*
+	 * NEEDSWORK: If we have unmerged entries, then stay full.
+	 * Unmerged entries prevent the cache-tree extension from working.
+	 */
+	if (index_has_unmerged_entries(istate))
+		return 0;
+
 	if (cache_tree_update(istate, 0)) {
 		warning(_("unable to update cache-tree, staying full"));
 		return -1;
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index e9a815ca7aaaa8..ba2fd94adaf0f5 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -352,6 +352,28 @@ test_expect_success 'merge with outside renames' '
 	done
 '
 
+# Sparse-index fails to convert the index in the
+# final 'git cherry-pick' command.
+test_expect_success 'cherry-pick with conflicts' '
+	init_repos &&
+
+	write_script edit-conflict <<-\EOF &&
+	echo $1 >conflict
+	EOF
+
+	test_all_match git checkout -b to-cherry-pick &&
+	run_on_all ../edit-conflict ABC &&
+	test_all_match git add conflict &&
+	test_all_match git commit -m "conflict to pick" &&
+
+	test_all_match git checkout -B base HEAD~1 &&
+	run_on_all ../edit-conflict DEF &&
+	test_all_match git add conflict &&
+	test_all_match git commit -m "conflict in base" &&
+
+	test_all_match test_must_fail git cherry-pick to-cherry-pick
+'
+
 test_expect_success 'clean' '
 	init_repos &&
 

From 47410778fbd2765c9e0a4b4026ece66aa7c83aa4 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:26 +0000
Subject: [PATCH 069/110] sparse-index: include EXTENDED flag when expanding

When creating a full index from a sparse one, we create cache entries
for every blob within a given sparse directory entry. These are
correctly marked with the CE_SKIP_WORKTREE flag, but the CE_EXTENDED
flag is not included. The CE_EXTENDED flag would exist if we loaded a
full index from disk with these entries marked with CE_SKIP_WORKTREE, so
we can add the flag here to be consistent. This allows us to directly
compare the flags present in cache entries when testing the sparse-index
feature, but has no significance to its correctness in the user-facing
functionality.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sparse-index.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sparse-index.c b/sparse-index.c
index 2c6959302755da..ef53bd2198b479 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -213,7 +213,7 @@ static int add_path_to_index(const struct object_id *oid,
 	strbuf_addstr(base, path);
 
 	ce = make_cache_entry(istate, mode, oid, base->buf, 0, 0);
-	ce->ce_flags |= CE_SKIP_WORKTREE;
+	ce->ce_flags |= CE_SKIP_WORKTREE | CE_EXTENDED;
 	set_index_entry(istate, istate->cache_nr++, ce);
 
 	strbuf_setlen(base, len);

From 3d814b5dc0567ebca1c40c6fe10bd9f19ca31f58 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:27 +0000
Subject: [PATCH 070/110] t1092: replace incorrect 'echo' with 'cat'

This fixes the test data shape to be as expected, allowing rename
detection to work properly now that the 'larger-content' file actually
has meaningful lines.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index ba2fd94adaf0f5..ebbba044f77f79 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -40,7 +40,7 @@ test_expect_success 'setup' '
 		done &&
 
 		git checkout -b rename-base base &&
-		echo >folder1/larger-content <<-\EOF &&
+		cat >folder1/larger-content <<-\EOF &&
 		matching
 		lines
 		help

From e669ffb2b8f7effa4f6876f61c35ae5c4a8566b2 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:28 +0000
Subject: [PATCH 071/110] t1092: expand repository data shape

As more features integrate with the sparse-index feature, more and more
special cases arise that require different data shapes within the tree
structure of the repository in order to demonstrate those cases.

Add several interesting special cases all at once instead of sprinkling
them across several commits. The interesting cases being added here are:

* Add sparse-directory entries on both sides of directories within the
  sparse-checkout definition.

* Add directories outside the sparse-checkout definition who have only
  one entry and are the first entry of a directory with multiple
  entries.

* Add filenames adjacent to a sparse directory entry that sort before
  and after the trailing slash.

Later tests will take advantage of these shapes, but they also deepen
the tests that already exist.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 44 ++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index ebbba044f77f79..0e71a623619cf4 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -17,7 +17,7 @@ test_expect_success 'setup' '
 		echo "after folder1" >g &&
 		echo "after x" >z &&
 		mkdir folder1 folder2 deep x &&
-		mkdir deep/deeper1 deep/deeper2 &&
+		mkdir deep/deeper1 deep/deeper2 deep/before deep/later &&
 		mkdir deep/deeper1/deepest &&
 		echo "after deeper1" >deep/e &&
 		echo "after deepest" >deep/deeper1/e &&
@@ -25,10 +25,23 @@ test_expect_success 'setup' '
 		cp a folder2 &&
 		cp a x &&
 		cp a deep &&
+		cp a deep/before &&
 		cp a deep/deeper1 &&
 		cp a deep/deeper2 &&
+		cp a deep/later &&
 		cp a deep/deeper1/deepest &&
 		cp -r deep/deeper1/deepest deep/deeper2 &&
+		mkdir deep/deeper1/0 &&
+		mkdir deep/deeper1/0/0 &&
+		touch deep/deeper1/0/1 &&
+		touch deep/deeper1/0/0/0 &&
+		>folder1- &&
+		>folder1.x &&
+		>folder10 &&
+		cp -r deep/deeper1/0 folder1 &&
+		cp -r deep/deeper1/0 folder2 &&
+		echo >>folder1/0/0/0 &&
+		echo >>folder2/0/1 &&
 		git add . &&
 		git commit -m "initial commit" &&
 		git checkout -b base &&
@@ -56,11 +69,17 @@ test_expect_success 'setup' '
 		mv folder1/a folder2/b &&
 		mv folder1/larger-content folder2/edited-content &&
 		echo >>folder2/edited-content &&
+		echo >>folder2/0/1 &&
+		echo stuff >>deep/deeper1/a &&
 		git add . &&
 		git commit -m "rename folder1/... to folder2/..." &&
 
 		git checkout -b rename-out-to-in rename-base &&
 		mv folder1/a deep/deeper1/b &&
+		echo more stuff >>deep/deeper1/a &&
+		rm folder2/0/1 &&
+		mkdir folder2/0/1 &&
+		echo >>folder2/0/1/1 &&
 		mv folder1/larger-content deep/deeper1/edited-content &&
 		echo >>deep/deeper1/edited-content &&
 		git add . &&
@@ -68,6 +87,9 @@ test_expect_success 'setup' '
 
 		git checkout -b rename-in-to-out rename-base &&
 		mv deep/deeper1/a folder1/b &&
+		echo >>folder2/0/1 &&
+		rm -rf folder1/0/0 &&
+		echo >>folder1/0/0 &&
 		mv deep/deeper1/larger-content folder1/edited-content &&
 		echo >>folder1/edited-content &&
 		git add . &&
@@ -262,13 +284,29 @@ test_expect_success 'diff --staged' '
 	test_all_match git diff --staged
 '
 
-test_expect_success 'diff with renames' '
+test_expect_success 'diff with renames and conflicts' '
 	init_repos &&
 
 	for branch in rename-out-to-out rename-out-to-in rename-in-to-out
 	do
 		test_all_match git checkout rename-base &&
-		test_all_match git checkout $branch -- .&&
+		test_all_match git checkout $branch -- . &&
+		test_all_match git status --porcelain=v2 &&
+		test_all_match git diff --staged --no-renames &&
+		test_all_match git diff --staged --find-renames || return 1
+	done
+'
+
+test_expect_success 'diff with directory/file conflicts' '
+	init_repos &&
+
+	for branch in rename-out-to-out rename-out-to-in rename-in-to-out
+	do
+		git -C full-checkout reset --hard &&
+		test_sparse_match git reset --hard &&
+		test_all_match git checkout $branch &&
+		test_all_match git checkout rename-base -- . &&
+		test_all_match git status --porcelain=v2 &&
 		test_all_match git diff --staged --no-renames &&
 		test_all_match git diff --staged --find-renames || return 1
 	done

From bf26c06f126219498a10ae9f8923cfb0bcbad823 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:29 +0000
Subject: [PATCH 072/110] t1092: add tests for status/add and sparse files

Before moving to update 'git status' and 'git add' to work with sparse
indexes, add an explicit test that ensures the sparse-index works the
same as a normal sparse-checkout when the worktree contains directories
and files outside of the sparse cone.

Specifically, 'folder1/a' is a file in our test repo, but 'folder1' is
not in the sparse cone. When 'folder1/a' is modified, the file is not
shown as modified and adding it will fail. This is new behavior as of
a20f704 (add: warn when asked to update SKIP_WORKTREE entries,
2021-04-08). Before that change, these adds would be silently ignored.

Untracked files are fine: adding new files both with 'git add .' and
'git add folder1/' works just as in a full checkout. This may not be
entirely desirable, but we are not intending to change behavior at the
moment, only document it. A future change could alter the behavior to
be more sensible, and this test could be modified to satisfy the new
expected behavior.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 38 ++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 0e71a623619cf4..2269f44e0333f4 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -254,6 +254,44 @@ test_expect_success 'add, commit, checkout' '
 	test_all_match git checkout -
 '
 
+test_expect_success 'status/add: outside sparse cone' '
+	init_repos &&
+
+	# adding a "missing" file outside the cone should fail
+	test_sparse_match test_must_fail git add folder1/a &&
+
+	# folder1 is at HEAD, but outside the sparse cone
+	run_on_sparse mkdir folder1 &&
+	cp initial-repo/folder1/a sparse-checkout/folder1/a &&
+	cp initial-repo/folder1/a sparse-index/folder1/a &&
+
+	test_sparse_match git status &&
+
+	write_script edit-contents <<-\EOF &&
+	echo text >>$1
+	EOF
+	run_on_sparse ../edit-contents folder1/a &&
+	run_on_all ../edit-contents folder1/new &&
+
+	test_sparse_match git status --porcelain=v2 &&
+
+	# This "git add folder1/a" fails with a warning
+	# in the sparse repos, differing from the full
+	# repo. This is intentional.
+	test_sparse_match test_must_fail git add folder1/a &&
+	test_sparse_match test_must_fail git add --refresh folder1/a &&
+	test_all_match git status --porcelain=v2 &&
+
+	test_all_match git add . &&
+	test_all_match git status --porcelain=v2 &&
+	test_all_match git commit -m folder1/new &&
+
+	run_on_all ../edit-contents folder1/newer &&
+	test_all_match git add folder1/ &&
+	test_all_match git status --porcelain=v2 &&
+	test_all_match git commit -m folder1/newer
+'
+
 test_expect_success 'checkout and reset --hard' '
 	init_repos &&
 

From 17a1bb570bcd165a3dc1c28c441bee45a941bb12 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:30 +0000
Subject: [PATCH 073/110] unpack-trees: preserve cache_bottom

The cache_bottom member of 'struct unpack_trees_options' is used to
track the range of index entries corresponding to a node of the cache
tree. While recursing with traverse_by_cache_tree(), this value is
preserved on the call stack using a local and then restored as that
method returns.

The mark_ce_used() method normally modifies the cache_bottom member when
it refers to the marked cache entry. However, sparse directory entries
are stored as nodes in the cache-tree data structure as of 2de37c53
(cache-tree: integrate with sparse directory entries, 2021-03-30). Thus,
the cache_bottom will be modified as the cache-tree walk advances. Do
not update it as well within mark_ce_used().

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 unpack-trees.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/unpack-trees.c b/unpack-trees.c
index f88a69f8e71649..87c1ed204c83e4 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -600,6 +600,13 @@ static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o)
 {
 	ce->ce_flags |= CE_UNPACKED;
 
+	/*
+	 * If this is a sparse directory, don't advance cache_bottom.
+	 * That will be advanced later using the cache-tree data.
+	 */
+	if (S_ISSPARSEDIR(ce->ce_mode))
+		return;
+
 	if (o->cache_bottom < o->src_index->cache_nr &&
 	    o->src_index->cache[o->cache_bottom] == ce) {
 		int bottom = o->cache_bottom;

From cd807a5cdabe5720071d91801dbc76faa8a643ba Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:31 +0000
Subject: [PATCH 074/110] unpack-trees: compare sparse directories correctly

As we further integrate the sparse-index into unpack-trees, we need to
ensure that we compare sparse directory entries correctly with other
entries. This affects searching for an exact path as well as sorting
index entries.

Sparse directory entries contain the trailing directory separator. This
is important for the sorting, in particular. Thus, within
do_compare_entry() we stop using S_IFREG in all cases, since sparse
directories should use S_IFDIR to indicate that the comparison should
treat the entry name as a dirctory.

Within compare_entry(), it first calls do_compare_entry() to check the
leading portion of the name. When the input path is a directory name, we
could match exactly already. Thus, we should return 0 if we have an
exact string match on a sparse directory entry. The final check is a
length comparison between the strings.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 unpack-trees.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 87c1ed204c83e4..b113cc750f2e17 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -983,6 +983,7 @@ static int do_compare_entry(const struct cache_entry *ce,
 	int pathlen, ce_len;
 	const char *ce_name;
 	int cmp;
+	unsigned ce_mode;
 
 	/*
 	 * If we have not precomputed the traverse path, it is quicker
@@ -1005,7 +1006,8 @@ static int do_compare_entry(const struct cache_entry *ce,
 	ce_len -= pathlen;
 	ce_name = ce->name + pathlen;
 
-	return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
+	ce_mode = S_ISSPARSEDIR(ce->ce_mode) ? S_IFDIR : S_IFREG;
+	return df_name_compare(ce_name, ce_len, ce_mode, name, namelen, mode);
 }
 
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
@@ -1014,6 +1016,16 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf
 	if (cmp)
 		return cmp;
 
+	/*
+	 * At this point, we know that we have a prefix match. If ce
+	 * is a sparse directory, then allow an exact match. This only
+	 * works when the input name is a directory, since ce->name
+	 * ends in a directory separator.
+	 */
+	if (S_ISSPARSEDIR(ce->ce_mode) &&
+	    ce->ce_namelen == traverse_path_len(info, tree_entry_len(n)) + 1)
+		return 0;
+
 	/*
 	 * Even if the beginning compared identically, the ce should
 	 * compare as bigger than a directory leading up to it!

From bd6a3fd7f1ab698c7ed1222c70a7a0f6a6592bd8 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:32 +0000
Subject: [PATCH 075/110] unpack-trees: rename unpack_nondirectories()

In the next change, we will use this method to unpack a sparse directory
entry, so change the name to unpack_single_entry() so these entries
apply. The new name reflects that we will not recurse into trees in
order to resolve the conflicts.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 unpack-trees.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index b113cc750f2e17..d26386ce8b228e 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -804,7 +804,7 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
 		BUG("We need cache-tree to do this optimization");
 
 	/*
-	 * Do what unpack_callback() and unpack_nondirectories() normally
+	 * Do what unpack_callback() and unpack_single_entry() normally
 	 * do. But we walk all paths in an iterative loop instead.
 	 *
 	 * D/F conflicts and higher stage entries are not a concern
@@ -1075,11 +1075,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
  * without actually calling it. If you change the logic here you may need to
  * check and change there as well.
  */
-static int unpack_nondirectories(int n, unsigned long mask,
-				 unsigned long dirmask,
-				 struct cache_entry **src,
-				 const struct name_entry *names,
-				 const struct traverse_info *info)
+static int unpack_single_entry(int n, unsigned long mask,
+			       unsigned long dirmask,
+			       struct cache_entry **src,
+			       const struct name_entry *names,
+			       const struct traverse_info *info)
 {
 	int i;
 	struct unpack_trees_options *o = info->data;
@@ -1322,7 +1322,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 		}
 	}
 
-	if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
+	if (unpack_single_entry(n, mask, dirmask, src, names, info) < 0)
 		return -1;
 
 	if (o->merge && src[0]) {

From 523506df51e2e13c8c354dff1eea3378bac8dec8 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:33 +0000
Subject: [PATCH 076/110] unpack-trees: unpack sparse directory entries

During unpack_callback(), index entries are compared against tree
entries. These are matched according to names and types. One goal is to
decide if we should recurse into subtrees or simply operate on one index
entry.

In the case of a sparse-directory entry, we do not want to recurse into
that subtree and instead simply compare the trees. In some cases, we
might want to perform a merge operation on the entry, such as during
'git checkout <commit>' which wants to replace a sparse tree entry with
the tree for that path at the target commit. We extend the logic within
unpack_single_entry() to create a sparse-directory entry in this case,
and then that is sent to call_unpack_fn().

There are some subtleties in this process. For instance, we need to
update find_cache_entry() to allow finding a sparse-directory entry that
exactly matches a given path. Use the new helper method
sparse_dir_matches_path() for this. We also need to ignore conflict
markers in the case that the entries correspond to directories and we
already have a sparse directory entry.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 unpack-trees.c | 107 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 99 insertions(+), 8 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index d26386ce8b228e..0a5135ab39745d 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1052,13 +1052,15 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
 	const struct name_entry *n,
 	int stage,
 	struct index_state *istate,
-	int is_transient)
+	int is_transient,
+	int is_sparse_directory)
 {
 	size_t len = traverse_path_len(info, tree_entry_len(n));
+	size_t alloc_len = is_sparse_directory ? len + 1 : len;
 	struct cache_entry *ce =
 		is_transient ?
-		make_empty_transient_cache_entry(len, NULL) :
-		make_empty_cache_entry(istate, len);
+		make_empty_transient_cache_entry(alloc_len, NULL) :
+		make_empty_cache_entry(istate, alloc_len);
 
 	ce->ce_mode = create_ce_mode(n->mode);
 	ce->ce_flags = create_ce_flags(stage);
@@ -1067,6 +1069,13 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
 	/* len+1 because the cache_entry allocates space for NUL */
 	make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen);
 
+	if (is_sparse_directory) {
+		ce->name[len] = '/';
+		ce->name[len + 1] = '\0';
+		ce->ce_namelen++;
+		ce->ce_flags |= CE_SKIP_WORKTREE;
+	}
+
 	return ce;
 }
 
@@ -1085,10 +1094,17 @@ static int unpack_single_entry(int n, unsigned long mask,
 	struct unpack_trees_options *o = info->data;
 	unsigned long conflicts = info->df_conflicts | dirmask;
 
-	/* Do we have *only* directories? Nothing to do */
 	if (mask == dirmask && !src[0])
 		return 0;
 
+	/*
+	 * When we have a sparse directory entry for src[0],
+	 * then this isn't necessarily a directory-file conflict.
+	 */
+	if (mask == dirmask && src[0] &&
+	    S_ISSPARSEDIR(src[0]->ce_mode))
+		conflicts = 0;
+
 	/*
 	 * Ok, we've filled in up to any potential index entry in src[0],
 	 * now do the rest.
@@ -1118,7 +1134,9 @@ static int unpack_single_entry(int n, unsigned long mask,
 		 * not stored in the index.  otherwise construct the
 		 * cache entry from the index aware logic.
 		 */
-		src[i + o->merge] = create_ce_entry(info, names + i, stage, &o->result, o->merge);
+		src[i + o->merge] = create_ce_entry(info, names + i, stage,
+						    &o->result, o->merge,
+						    bit & dirmask);
 	}
 
 	if (o->merge) {
@@ -1222,16 +1240,71 @@ static int find_cache_pos(struct traverse_info *info,
 	return -1;
 }
 
+/*
+ * Given a sparse directory entry 'ce', compare ce->name to
+ * info->name + '/' + p->path + '/' if info->name is non-empty.
+ * Compare ce->name to p->path + '/' otherwise. Note that
+ * ce->name must end in a trailing '/' because it is a sparse
+ * directory entry.
+ */
+static int sparse_dir_matches_path(const struct cache_entry *ce,
+				   struct traverse_info *info,
+				   const struct name_entry *p)
+{
+	assert(S_ISSPARSEDIR(ce->ce_mode));
+	assert(ce->name[ce->ce_namelen - 1] == '/');
+
+	if (info->namelen)
+		return ce->ce_namelen == info->namelen + p->pathlen + 2 &&
+		       ce->name[info->namelen] == '/' &&
+		       !strncmp(ce->name, info->name, info->namelen) &&
+		       !strncmp(ce->name + info->namelen + 1, p->path, p->pathlen);
+	return ce->ce_namelen == p->pathlen + 1 &&
+	       !strncmp(ce->name, p->path, p->pathlen);
+}
+
 static struct cache_entry *find_cache_entry(struct traverse_info *info,
 					    const struct name_entry *p)
 {
+	struct cache_entry *ce;
 	int pos = find_cache_pos(info, p->path, p->pathlen);
 	struct unpack_trees_options *o = info->data;
 
 	if (0 <= pos)
 		return o->src_index->cache[pos];
-	else
+
+	/*
+	 * Check for a sparse-directory entry named "path/".
+	 * Due to the input p->path not having a trailing
+	 * slash, the negative 'pos' value overshoots the
+	 * expected position, hence "-2" instead of "-1".
+	 */
+	pos = -pos - 2;
+
+	if (pos < 0 || pos >= o->src_index->cache_nr)
 		return NULL;
+
+	/*
+	 * Due to lexicographic sorting and sparse directory
+	 * entries ending with a trailing slash, our path as a
+	 * sparse directory (e.g "subdir/") and	our path as a
+	 * file (e.g. "subdir") might be separated by other
+	 * paths (e.g. "subdir-").
+	 */
+	while (pos >= 0) {
+		ce = o->src_index->cache[pos];
+
+		if (strncmp(ce->name, p->path, p->pathlen))
+			return NULL;
+
+		if (S_ISSPARSEDIR(ce->ce_mode) &&
+		    sparse_dir_matches_path(ce, info, p))
+			return ce;
+
+		pos--;
+	}
+
+	return NULL;
 }
 
 static void debug_path(struct traverse_info *info)
@@ -1266,6 +1339,21 @@ static void debug_unpack_callback(int n,
 		debug_name_entry(i, names + i);
 }
 
+/*
+ * Returns true if and only if the given cache_entry is a
+ * sparse-directory entry that matches the given name_entry
+ * from the tree walk at the given traverse_info.
+ */
+static int is_sparse_directory_entry(struct cache_entry *ce,
+				     struct name_entry *name,
+				     struct traverse_info *info)
+{
+	if (!ce || !name || !S_ISSPARSEDIR(ce->ce_mode))
+		return 0;
+
+	return sparse_dir_matches_path(ce, info, name);
+}
+
 /*
  * Note that traverse_by_cache_tree() duplicates some logic in this function
  * without actually calling it. If you change the logic here you may need to
@@ -1352,9 +1440,12 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 			}
 		}
 
-		if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
-					     names, info) < 0)
+		if (!is_sparse_directory_entry(src[0], names, info) &&
+		    traverse_trees_recursive(n, dirmask, mask & ~dirmask,
+						    names, info) < 0) {
 			return -1;
+		}
+
 		return mask;
 	}
 

From 69bdbdb0ee25bd64bd3ad2fe241d434442e96f75 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:34 +0000
Subject: [PATCH 077/110] dir.c: accept a directory as part of cone-mode
 patterns

When we have sparse directory entries in the index, we want to compare
that directory against sparse-checkout patterns. Those pattern matching
algorithms are built expecting a file path, not a directory path. This
is especially important in the "cone mode" patterns which will match
files that exist within the "parent directories" as well as the
recursive directory matches.

If path_matches_pattern_list() is given a directory, we can add a fake
filename ("-") to the directory and get the same results as before,
assuming we are in cone mode. Since sparse index requires cone mode
patterns, this is an acceptable assumption.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/dir.c b/dir.c
index ebe5ec046e0506..0c5264b3b20a2f 100644
--- a/dir.c
+++ b/dir.c
@@ -1376,7 +1376,7 @@ enum pattern_match_result path_matches_pattern_list(
 	struct path_pattern *pattern;
 	struct strbuf parent_pathname = STRBUF_INIT;
 	int result = NOT_MATCHED;
-	const char *slash_pos;
+	size_t slash_pos;
 
 	if (!pl->use_cone_patterns) {
 		pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
@@ -1397,21 +1397,35 @@ enum pattern_match_result path_matches_pattern_list(
 	strbuf_addch(&parent_pathname, '/');
 	strbuf_add(&parent_pathname, pathname, pathlen);
 
+	/*
+	 * Directory entries are matched if and only if a file
+	 * contained immediately within them is matched. For the
+	 * case of a directory entry, modify the path to create
+	 * a fake filename within this directory, allowing us to
+	 * use the file-base matching logic in an equivalent way.
+	 */
+	if (parent_pathname.len > 0 &&
+	    parent_pathname.buf[parent_pathname.len - 1] == '/') {
+		slash_pos = parent_pathname.len - 1;
+		strbuf_add(&parent_pathname, "-", 1);
+	} else {
+		const char *slash_ptr = strrchr(parent_pathname.buf, '/');
+		slash_pos = slash_ptr ? slash_ptr - parent_pathname.buf : 0;
+	}
+
 	if (hashmap_contains_path(&pl->recursive_hashmap,
 				  &parent_pathname)) {
 		result = MATCHED_RECURSIVE;
 		goto done;
 	}
 
-	slash_pos = strrchr(parent_pathname.buf, '/');
-
-	if (slash_pos == parent_pathname.buf) {
+	if (!slash_pos) {
 		/* include every file in root */
 		result = MATCHED;
 		goto done;
 	}
 
-	strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+	strbuf_setlen(&parent_pathname, slash_pos);
 
 	if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
 		result = MATCHED;

From 9eb00af5627f1a04ca083128085ca6774fce0524 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:35 +0000
Subject: [PATCH 078/110] diff-lib: handle index diffs with sparse dirs

While comparing an index to a tree, we may see a sparse directory entry.
In this case, we should compare that portion of the tree to the tree
represented by that entry. This could include a new tree which needs to
be expanded to a full list of added files. It could also include an
existing tree, in which case all of the changes inside are important to
describe, including the modifications, additions, and deletions. Note
that the case where the tree has a path and the index does not remains
identical to before: the lack of a cache entry is the same with a sparse
index.

Use diff_tree_oid() appropriately to compute the diff.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff-lib.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/diff-lib.c b/diff-lib.c
index c2ac9250fe92c2..f9eadc4fc1a64b 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -325,6 +325,11 @@ static void show_new_file(struct rev_info *revs,
 	unsigned dirty_submodule = 0;
 	struct index_state *istate = revs->diffopt.repo->index;
 
+	if (new_file && S_ISSPARSEDIR(new_file->ce_mode)) {
+		diff_tree_oid(NULL, &new_file->oid, new_file->name, &revs->diffopt);
+		return;
+	}
+
 	/*
 	 * New file in the index: it might actually be different in
 	 * the working tree.
@@ -347,6 +352,20 @@ static int show_modified(struct rev_info *revs,
 	unsigned dirty_submodule = 0;
 	struct index_state *istate = revs->diffopt.repo->index;
 
+	assert(S_ISSPARSEDIR(old_entry->ce_mode) ==
+	       S_ISSPARSEDIR(new_entry->ce_mode));
+
+	/*
+	 * If both are sparse directory entries, then expand the
+	 * modifications to the file level. If only one was a sparse
+	 * directory, then they appear as an add and delete instead of
+	 * a modification.
+	 */
+	if (S_ISSPARSEDIR(new_entry->ce_mode)) {
+		diff_tree_oid(&old_entry->oid, &new_entry->oid, new_entry->name, &revs->diffopt);
+		return 0;
+	}
+
 	if (get_stat_data(istate, new_entry, &oid, &mode, cached, match_missing,
 			  &dirty_submodule, &revs->diffopt) < 0) {
 		if (report_missing)

From bf48e5acdbf25a98fd142d9c90157c10b427119a Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:36 +0000
Subject: [PATCH 079/110] status: skip sparse-checkout percentage with
 sparse-index

'git status' began reporting a percentage of populated paths when
sparse-checkout is enabled in 051df3cf (wt-status: show sparse
checkout status as well, 2020-07-18). This percentage is incorrect when
the index has sparse directories. It would also be expensive to
calculate as we would need to parse trees to count the total number of
possible paths.

Avoid the expensive computation by simplifying the output to only report
that a sparse checkout exists, without the percentage.

This change is the reason we use 'git status --porcelain=v2' in
t1092-sparse-checkout-compatibility.sh. We don't want to ensure that
this message is equal across both modes, but instead just the important
information about staged, modified, and untracked files are compared.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh |  8 ++++++++
 wt-status.c                              | 14 +++++++++++---
 wt-status.h                              |  1 +
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 2269f44e0333f4..375b0d35565f86 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -218,6 +218,14 @@ test_expect_success 'status with options' '
 	test_all_match git status --porcelain=v2 -uno
 '
 
+test_expect_success 'status reports sparse-checkout' '
+	init_repos &&
+	git -C sparse-checkout status >full &&
+	git -C sparse-index status >sparse &&
+	test_i18ngrep "You are in a sparse checkout with " full &&
+	test_i18ngrep "You are in a sparse checkout." sparse
+'
+
 test_expect_success 'add, commit, checkout' '
 	init_repos &&
 
diff --git a/wt-status.c b/wt-status.c
index 42b673571696bd..96db3e74962ea9 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1493,9 +1493,12 @@ static void show_sparse_checkout_in_use(struct wt_status *s,
 	if (s->state.sparse_checkout_percentage == SPARSE_CHECKOUT_DISABLED)
 		return;
 
-	status_printf_ln(s, color,
-			 _("You are in a sparse checkout with %d%% of tracked files present."),
-			 s->state.sparse_checkout_percentage);
+	if (s->state.sparse_checkout_percentage == SPARSE_CHECKOUT_SPARSE_INDEX)
+		status_printf_ln(s, color, _("You are in a sparse checkout."));
+	else
+		status_printf_ln(s, color,
+				_("You are in a sparse checkout with %d%% of tracked files present."),
+				s->state.sparse_checkout_percentage);
 	wt_longstatus_print_trailer(s);
 }
 
@@ -1653,6 +1656,11 @@ static void wt_status_check_sparse_checkout(struct repository *r,
 		return;
 	}
 
+	if (r->index->sparse_index) {
+		state->sparse_checkout_percentage = SPARSE_CHECKOUT_SPARSE_INDEX;
+		return;
+	}
+
 	for (i = 0; i < r->index->cache_nr; i++) {
 		struct cache_entry *ce = r->index->cache[i];
 		if (ce_skip_worktree(ce))
diff --git a/wt-status.h b/wt-status.h
index 0d32799b28e19a..ab9cc9d8f032b7 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -78,6 +78,7 @@ enum wt_status_format {
 };
 
 #define SPARSE_CHECKOUT_DISABLED -1
+#define SPARSE_CHECKOUT_SPARSE_INDEX -2
 
 struct wt_status_state {
 	int merge_in_progress;

From d76723ee5313d0176e8777af4aa104d7562543c1 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:37 +0000
Subject: [PATCH 080/110] status: use sparse-index throughout

By testing 'git -c core.fsmonitor= status -uno', we can check for the
simplest index operations that can be made sparse-aware. The necessary
implementation details are already integrated with sparse-checkout, so
modify command_requires_full_index to be zero for cmd_status().

In refresh_index(), we loop through the index entries to refresh their
stat() information. However, sparse directories have no stat()
information to populate. Ignore these entries.

This allows 'git status' to no longer expand a sparse index to a full
one. This is further tested by dropping the "-uno" option and adding an
untracked file into the worktree.

The performance test p2000-sparse-checkout-operations.sh demonstrates
these improvements:

Test                                  HEAD~1           HEAD
-----------------------------------------------------------------------------
2000.2: git status (full-index-v3)    0.31(0.30+0.05)  0.31(0.29+0.06) +0.0%
2000.3: git status (full-index-v4)    0.31(0.29+0.07)  0.34(0.30+0.08) +9.7%
2000.4: git status (sparse-index-v3)  2.35(2.28+0.10)  0.04(0.04+0.05) -98.3%
2000.5: git status (sparse-index-v4)  2.35(2.24+0.15)  0.05(0.04+0.06) -97.9%

Note that since HEAD~1 was expanding the sparse index by parsing trees,
it was artificially slower than the full index case. Thus, the 98%
improvement is misleading, and instead we should celebrate the 0.34s to
0.05s improvement of 85%. This is more indicative of the peformance
gains we are expecting by using a sparse index.

Note: we are dropping the assignment of core.fsmonitor here. This is not
necessary for the test script as we are not altering the config any
other way. Correct integration with FS Monitor will be validated in
later changes.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/commit.c                         |  3 +++
 read-cache.c                             | 10 ++++++++--
 t/t1092-sparse-checkout-compatibility.sh | 13 +++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 190d215d43b37b..12f51db158a0e0 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1510,6 +1510,9 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	if (argc == 2 && !strcmp(argv[1], "-h"))
 		usage_with_options(builtin_status_usage, builtin_status_options);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	status_init_config(&s, git_status_config);
 	argc = parse_options(argc, argv, prefix,
 			     builtin_status_options,
diff --git a/read-cache.c b/read-cache.c
index 77961a38854069..f67e04949de4ff 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1585,8 +1585,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	 */
 	preload_index(istate, pathspec, 0);
 	trace2_region_enter("index", "refresh", NULL);
-	/* TODO: audit for interaction with sparse-index. */
-	ensure_full_index(istate);
+
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce, *new_entry;
 		int cache_errno = 0;
@@ -1601,6 +1600,13 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 		if (ignore_skip_worktree && ce_skip_worktree(ce))
 			continue;
 
+		/*
+		 * If this entry is a sparse directory, then there isn't
+		 * any stat() information to update. Ignore the entry.
+		 */
+		if (S_ISSPARSEDIR(ce->ce_mode))
+			continue;
+
 		if (pathspec && !ce_path_match(istate, ce, pathspec, seen))
 			filtered = 1;
 
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 375b0d35565f86..751f397cc7f46e 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -511,12 +511,17 @@ test_expect_success 'sparse-index is expanded and converted back' '
 	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
 		git -C sparse-index -c core.fsmonitor="" reset --hard &&
 	test_region index convert_to_sparse trace2.txt &&
-	test_region index ensure_full_index trace2.txt &&
+	test_region index ensure_full_index trace2.txt
+'
 
-	rm trace2.txt &&
+test_expect_success 'sparse-index is not expanded' '
+	init_repos &&
+
+	rm -f trace2.txt &&
+	echo >>sparse-index/untracked.txt &&
 	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
-		git -C sparse-index -c core.fsmonitor="" status -uno &&
-	test_region index ensure_full_index trace2.txt
+		git -C sparse-index status &&
+	test_region ! index ensure_full_index trace2.txt
 '
 
 test_done

From fe0d576153117953189e03d6c2c09445ccad4977 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:38 +0000
Subject: [PATCH 081/110] wt-status: expand added sparse directory entries

It is difficult, but possible, to get into a state where we intend to
add a directory that is outside of the sparse-checkout definition. Add a
test to t1092-sparse-checkout-compatibility.sh that demonstrates this
using a combination of 'git reset --mixed' and 'git checkout --orphan'.

This test failed before because the output of 'git status
--porcelain=v2' would not match on the lines for folder1/:

* The sparse-checkout repo (with a full index) would output each path
  name that is intended to be added.

* The sparse-index repo would only output that "folder1/" is staged for
  addition.

The status should report the full list of files to be added, and so this
sparse-directory entry should be expanded to a full list when reaching
it inside the wt_status_collect_changes_initial() method. Use
read_tree_at() to assist.

Somehow, this loop over the cache entries was not guarded by
ensure_full_index() as intended.

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 33 +++++++++++++++
 wt-status.c                              | 51 ++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 751f397cc7f46e..2394c36d881106 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -524,4 +524,37 @@ test_expect_success 'sparse-index is not expanded' '
 	test_region ! index ensure_full_index trace2.txt
 '
 
+test_expect_success 'reset mixed and checkout orphan' '
+	init_repos &&
+
+	test_all_match git checkout rename-out-to-in &&
+
+	# Sparse checkouts do not agree with full checkouts about
+	# how to report a directory/file conflict during a reset.
+	# This command would fail with test_all_match because the
+	# full checkout reports "T folder1/0/1" while a sparse
+	# checkout reports "D folder1/0/1". This matches because
+	# the sparse checkouts skip "adding" the other side of
+	# the conflict.
+	test_sparse_match git reset --mixed HEAD~1 &&
+	test_sparse_match test-tool read-cache --table --expand &&
+	test_sparse_match git status --porcelain=v2 &&
+
+	# At this point, sparse-checkouts behave differently
+	# from the full-checkout.
+	test_sparse_match git checkout --orphan new-branch &&
+	test_sparse_match test-tool read-cache --table --expand &&
+	test_sparse_match git status --porcelain=v2
+'
+
+test_expect_success 'add everything with deep new file' '
+	init_repos &&
+
+	run_on_sparse git sparse-checkout set deep/deeper1/deepest &&
+
+	run_on_all touch deep/deeper1/x &&
+	test_all_match git add . &&
+	test_all_match git status --porcelain=v2
+'
+
 test_done
diff --git a/wt-status.c b/wt-status.c
index 96db3e74962ea9..0317baef87e8eb 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -657,6 +657,36 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	clear_pathspec(&rev.prune_data);
 }
 
+static int add_file_to_list(const struct object_id *oid,
+			    struct strbuf *base, const char *path,
+			    unsigned int mode, void *context)
+{
+	struct string_list_item *it;
+	struct wt_status_change_data *d;
+	struct wt_status *s = context;
+	struct strbuf full_name = STRBUF_INIT;
+
+	if (S_ISDIR(mode))
+		return READ_TREE_RECURSIVE;
+
+	strbuf_add(&full_name, base->buf, base->len);
+	strbuf_addstr(&full_name, path);
+	it = string_list_insert(&s->change, full_name.buf);
+	d = it->util;
+	if (!d) {
+		CALLOC_ARRAY(d, 1);
+		it->util = d;
+	}
+
+	d->index_status = DIFF_STATUS_ADDED;
+	/* Leave {mode,oid}_head zero for adds. */
+	d->mode_index = mode;
+	oidcpy(&d->oid_index, oid);
+	s->committable = 1;
+	strbuf_release(&full_name);
+	return 0;
+}
+
 static void wt_status_collect_changes_initial(struct wt_status *s)
 {
 	struct index_state *istate = s->repo->index;
@@ -671,6 +701,27 @@ static void wt_status_collect_changes_initial(struct wt_status *s)
 			continue;
 		if (ce_intent_to_add(ce))
 			continue;
+		if (S_ISSPARSEDIR(ce->ce_mode)) {
+			/*
+			 * This is a sparse directory entry, so we want to collect all
+			 * of the added files within the tree. This requires recursively
+			 * expanding the trees to find the elements that are new in this
+			 * tree and marking them with DIFF_STATUS_ADDED.
+			 */
+			struct strbuf base = STRBUF_INIT;
+			struct pathspec ps = { 0 };
+			struct tree *tree = lookup_tree(istate->repo, &ce->oid);
+
+			ps.recursive = 1;
+			ps.has_wildcard = 1;
+			ps.max_depth = -1;
+
+			strbuf_add(&base, ce->name, ce->ce_namelen);
+			read_tree_at(istate->repo, tree, &base, &ps,
+				     add_file_to_list, s);
+			continue;
+		}
+
 		it = string_list_insert(&s->change, ce->name);
 		d = it->util;
 		if (!d) {

From f8fe49e53958f19b0e62e9549a80fcaa56d2f3cf Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:39 +0000
Subject: [PATCH 082/110] fsmonitor: integrate with sparse index

If we need to expand a sparse-index into a full one, then the FS Monitor
bitmap is going to be incorrect. Ensure that we start fresh at such an
event.

While this is currently a performance drawback, the eventual hope of the
sparse-index feature is that these expansions will be rare and hence we
will be able to keep the FS Monitor data accurate across multiple Git
commands.

These tests are added to demonstrate that the behavior is the same
across a full index and a sparse index, but also that file modifications
to a tracked directory outside of the sparse cone will trigger
ensure_full_index().

Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sparse-index.c              |  7 ++++++
 t/t7519-status-fsmonitor.sh | 49 +++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/sparse-index.c b/sparse-index.c
index ef53bd2198b479..53c8f711ccc82e 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -186,6 +186,10 @@ int convert_to_sparse(struct index_state *istate)
 	cache_tree_free(&istate->cache_tree);
 	cache_tree_update(istate, 0);
 
+	istate->fsmonitor_has_run_once = 0;
+	FREE_AND_NULL(istate->fsmonitor_dirty);
+	FREE_AND_NULL(istate->fsmonitor_last_update);
+
 	istate->sparse_index = 1;
 	trace2_region_leave("index", "convert_to_sparse", istate->repo);
 	return 0;
@@ -282,6 +286,9 @@ void ensure_full_index(struct index_state *istate)
 	istate->cache = full->cache;
 	istate->cache_nr = full->cache_nr;
 	istate->cache_alloc = full->cache_alloc;
+	istate->fsmonitor_has_run_once = 0;
+	FREE_AND_NULL(istate->fsmonitor_dirty);
+	FREE_AND_NULL(istate->fsmonitor_last_update);
 
 	strbuf_release(&base);
 	free(full);
diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 637391c6ce4608..deea88d4431d23 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -73,6 +73,7 @@ test_expect_success 'setup' '
 	expect*
 	actual*
 	marker*
+	trace2*
 	EOF
 '
 
@@ -383,4 +384,52 @@ test_expect_success 'status succeeds after staging/unstaging' '
 	)
 '
 
+# Usage:
+# check_sparse_index_behavior [!]
+# If "!" is supplied, then we verify that we do not call ensure_full_index
+# during a call to 'git status'. Otherwise, we verify that we _do_ call it.
+check_sparse_index_behavior () {
+	git status --porcelain=v2 >expect &&
+	git sparse-checkout init --cone --sparse-index &&
+	git sparse-checkout set dir1 dir2 &&
+	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
+		git status --porcelain=v2 >actual &&
+	test_region $1 index ensure_full_index trace2.txt &&
+	test_region fsm_hook query trace2.txt &&
+	test_cmp expect actual &&
+	rm trace2.txt &&
+	git sparse-checkout disable
+}
+
+test_expect_success 'status succeeds with sparse index' '
+	git reset --hard &&
+
+	test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
+	check_sparse_index_behavior ! &&
+
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+		printf "last_update_token\0"
+	EOF
+	git config core.fsmonitor .git/hooks/fsmonitor-test &&
+	check_sparse_index_behavior ! &&
+
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+		printf "last_update_token\0"
+		printf "dir1/modified\0"
+	EOF
+	check_sparse_index_behavior ! &&
+
+	cp -r dir1 dir1a &&
+	git add dir1a &&
+	git commit -m "add dir1a" &&
+
+	# This one modifies outside the sparse-checkout definition
+	# and hence we expect to expand the sparse-index.
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+		printf "last_update_token\0"
+		printf "dir1a/modified\0"
+	EOF
+	check_sparse_index_behavior
+'
+
 test_done

From e5ca291076a8a936283bb2c57433c4393d3f80c2 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 14 Jul 2021 13:12:40 +0000
Subject: [PATCH 083/110] t1092: document bad sparse-checkout behavior

There are several situations where a repository with sparse-checkout
enabled will act differently than a normal repository, and in ways that
are not intentional. The test t1092-sparse-checkout-compatibility.sh
documents some of these deviations, but a casual reader might think
these are intentional behavior changes.

Add comments on these tests that make it clear that these behaviors
should be updated. Using 'NEEDSWORK' helps contributors find that these
are potential areas for improvement.

Helped-by: Elijah Newren <newren@gmail.com>
Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 2394c36d881106..cabbd42e339041 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -392,8 +392,8 @@ test_expect_failure 'blame with pathspec outside sparse definition' '
 	test_all_match git blame deep/deeper2/deepest/a
 '
 
-# TODO: reset currently does not behave as expected when in a
-# sparse-checkout.
+# NEEDSWORK: a sparse-checkout behaves differently from a full checkout
+# in this scenario, but it shouldn't.
 test_expect_failure 'checkout and reset (mixed)' '
 	init_repos &&
 
@@ -403,8 +403,8 @@ test_expect_failure 'checkout and reset (mixed)' '
 	test_all_match git reset update-folder2
 '
 
-# Ensure that sparse-index behaves identically to
-# sparse-checkout with a full index.
+# NEEDSWORK: a sparse-checkout behaves differently from a full checkout
+# in this scenario, but it shouldn't.
 test_expect_success 'checkout and reset (mixed) [sparse]' '
 	init_repos &&
 
@@ -524,6 +524,8 @@ test_expect_success 'sparse-index is not expanded' '
 	test_region ! index ensure_full_index trace2.txt
 '
 
+# NEEDSWORK: a sparse-checkout behaves differently from a full checkout
+# in this scenario, but it shouldn't.
 test_expect_success 'reset mixed and checkout orphan' '
 	init_repos &&
 

From 6e74958f5900855cc0c3b6484b2b2a7732293e6c Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 1 Jun 2021 13:41:33 -0400
Subject: [PATCH 084/110] p2000: add 'git checkout -' test and decrease depth

As we increase our list of commands to test in
p2000-sparse-operations.sh, we will want to have a slightly smaller test
repository. Reduce the size by a factor of four by reducing the depth of
the step that creates a big index around a moderately-sized repository.

Also add a step to run 'git checkout -' on repeat. This requires having
a previous location in the reflog, so add that to the initialization
steps.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/perf/p2000-sparse-operations.sh | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh
index 94513c97748943..f7f8c0121030a2 100755
--- a/t/perf/p2000-sparse-operations.sh
+++ b/t/perf/p2000-sparse-operations.sh
@@ -6,7 +6,7 @@ test_description="test performance of Git operations using the index"
 
 test_perf_default_repo
 
-SPARSE_CONE=f2/f4/f1
+SPARSE_CONE=f2/f4
 
 test_expect_success 'setup repo and indexes' '
 	git reset --hard HEAD &&
@@ -27,7 +27,7 @@ test_expect_success 'setup repo and indexes' '
 	OLD_COMMIT=$(git rev-parse HEAD) &&
 	OLD_TREE=$(git rev-parse HEAD^{tree}) &&
 
-	for i in $(test_seq 1 4)
+	for i in $(test_seq 1 3)
 	do
 		cat >in <<-EOF &&
 			100755 blob $BLOB	a
@@ -43,14 +43,23 @@ test_expect_success 'setup repo and indexes' '
 	done &&
 
 	git sparse-checkout init --cone &&
-	git branch -f wide $OLD_COMMIT &&
+	git sparse-checkout set $SPARSE_CONE &&
+	git checkout -b wide $OLD_COMMIT &&
+
+	for l2 in f1 f2 f3 f4
+	do
+		echo more bogus >>$SPARSE_CONE/$l2/a &&
+		git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1
+	done &&
+
 	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 &&
 	(
 		cd full-index-v3 &&
 		git sparse-checkout init --cone &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 3 &&
-		git update-index --index-version=3
+		git update-index --index-version=3 &&
+		git checkout HEAD~4
 	) &&
 	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 &&
 	(
@@ -58,7 +67,8 @@ test_expect_success 'setup repo and indexes' '
 		git sparse-checkout init --cone &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 4 &&
-		git update-index --index-version=4
+		git update-index --index-version=4 &&
+		git checkout HEAD~4
 	) &&
 	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 &&
 	(
@@ -66,7 +76,8 @@ test_expect_success 'setup repo and indexes' '
 		git sparse-checkout init --cone --sparse-index &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 3 &&
-		git update-index --index-version=3
+		git update-index --index-version=3 &&
+		git checkout HEAD~4
 	) &&
 	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 &&
 	(
@@ -74,7 +85,8 @@ test_expect_success 'setup repo and indexes' '
 		git sparse-checkout init --cone --sparse-index &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 4 &&
-		git update-index --index-version=4
+		git update-index --index-version=4 &&
+		git checkout HEAD~4
 	)
 '
 
@@ -97,5 +109,6 @@ test_perf_on_all git status
 test_perf_on_all git add -A
 test_perf_on_all git add .
 test_perf_on_all git commit -a -m A
+test_perf_on_all git checkout -f -
 
 test_done

From 3e1d03c41be9701c2cd518afc6ebc2797bca4398 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 7 Jun 2021 07:26:00 -0400
Subject: [PATCH 085/110] p2000: compress repo names

By using shorter names for the test repos, we will get a slightly more
compressed performance summary without comprimising clarity.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/perf/p2000-sparse-operations.sh | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh
index f7f8c0121030a2..597626276fbeae 100755
--- a/t/perf/p2000-sparse-operations.sh
+++ b/t/perf/p2000-sparse-operations.sh
@@ -52,36 +52,36 @@ test_expect_success 'setup repo and indexes' '
 		git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1
 	done &&
 
-	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 &&
+	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v3 &&
 	(
-		cd full-index-v3 &&
+		cd full-v3 &&
 		git sparse-checkout init --cone &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 3 &&
 		git update-index --index-version=3 &&
 		git checkout HEAD~4
 	) &&
-	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 &&
+	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v4 &&
 	(
-		cd full-index-v4 &&
+		cd full-v4 &&
 		git sparse-checkout init --cone &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 4 &&
 		git update-index --index-version=4 &&
 		git checkout HEAD~4
 	) &&
-	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 &&
+	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v3 &&
 	(
-		cd sparse-index-v3 &&
+		cd sparse-v3 &&
 		git sparse-checkout init --cone --sparse-index &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 3 &&
 		git update-index --index-version=3 &&
 		git checkout HEAD~4
 	) &&
-	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 &&
+	git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v4 &&
 	(
-		cd sparse-index-v4 &&
+		cd sparse-v4 &&
 		git sparse-checkout init --cone --sparse-index &&
 		git sparse-checkout set $SPARSE_CONE &&
 		git config index.version 4 &&
@@ -92,8 +92,8 @@ test_expect_success 'setup repo and indexes' '
 
 test_perf_on_all () {
 	command="$@"
-	for repo in full-index-v3 full-index-v4 \
-		    sparse-index-v3 sparse-index-v4
+	for repo in full-v3 full-v4 \
+		    sparse-v3 sparse-v4
 	do
 		test_perf "$command ($repo)" "
 			(

From cd94f820052c948e522f947a05797ebaf1907121 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 17 May 2021 16:13:20 -0400
Subject: [PATCH 086/110] commit: integrate with sparse-index

Update 'git commit' to allow using the sparse-index in memory without
expanding to a full one. The only place that had an ensure_full_index()
call was in cache_tree_update(). The recursive algorithm for
update_one() was already updated in 2de37c536 (cache-tree: integrate
with sparse directory entries, 2021-03-03) to handle sparse directory
entries in the index.

Most of this change involves testing different command-line options that
allow specifying which on-disk changes should be included in the commit.
This includes no options (only take currently-staged changes), -a (take
all tracked changes), and --include (take a list of specific changes).
To simplify testing that these options do not expand the index, update
the test that previously verified that 'git status' does not expand the
index with a helper method, ensure_not_expanded().

This allows 'git commit' to operate much faster when the sparse-checkout
cone is much smaller than the full list of files at HEAD.

Here are the relevant lines from p2000-sparse-operations.sh:

Test                                      HEAD~1           HEAD
----------------------------------------------------------------------------------
2000.14: git commit -a -m A (full-v3)     0.35(0.26+0.06)  0.36(0.28+0.07) +2.9%
2000.15: git commit -a -m A (full-v4)     0.32(0.26+0.05)  0.34(0.28+0.06) +6.3%
2000.16: git commit -a -m A (sparse-v3)   0.63(0.59+0.06)  0.04(0.05+0.05) -93.7%
2000.17: git commit -a -m A (sparse-v4)   0.64(0.59+0.08)  0.04(0.04+0.04) -93.8%

It is important to compare the full-index case to the sparse-index case,
so the improvement for index version v4 is actually an 88% improvement in
this synthetic example.

In a real repository with over two million files at HEAD and 60,000
files in the sparse-checkout definition, the time for 'git commit -a'
went from 2.61 seconds to 134ms. I compared this to the result if the
index only contained the paths in the sparse-checkout definition and
found the theoretical optimum to be 120ms, so the out-of-cone paths only
add a 12% overhead.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/commit.c                         |  3 ++
 cache-tree.c                             |  2 -
 t/t1092-sparse-checkout-compatibility.sh | 47 ++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 12f51db158a0e0..0bc64892505f7c 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1682,6 +1682,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
 	if (argc == 2 && !strcmp(argv[1], "-h"))
 		usage_with_options(builtin_commit_usage, builtin_commit_options);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	status_init_config(&s, git_commit_config);
 	s.commit_template = 1;
 	status_format = STATUS_FORMAT_NONE; /* Ignore status.short */
diff --git a/cache-tree.c b/cache-tree.c
index 45e58666afcb49..577b18d8811c02 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -461,8 +461,6 @@ int cache_tree_update(struct index_state *istate, int flags)
 	if (i)
 		return i;
 
-	ensure_full_index(istate);
-
 	if (!istate->cache_tree)
 		istate->cache_tree = cache_tree();
 
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index cabbd42e339041..d3e34d0acaccc9 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -262,6 +262,34 @@ test_expect_success 'add, commit, checkout' '
 	test_all_match git checkout -
 '
 
+test_expect_success 'commit including unstaged changes' '
+	init_repos &&
+
+	write_script edit-file <<-\EOF &&
+	echo $1 >$2
+	EOF
+
+	run_on_all ../edit-file 1 a &&
+	run_on_all ../edit-file 1 deep/a &&
+
+	test_all_match git commit -m "-a" -a &&
+	test_all_match git status --porcelain=v2 &&
+
+	run_on_all ../edit-file 2 a &&
+	run_on_all ../edit-file 2 deep/a &&
+
+	test_all_match git commit -m "--include" --include deep/a &&
+	test_all_match git status --porcelain=v2 &&
+	test_all_match git commit -m "--include" --include a &&
+	test_all_match git status --porcelain=v2 &&
+
+	run_on_all ../edit-file 3 a &&
+	run_on_all ../edit-file 3 deep/a &&
+
+	test_all_match git commit -m "--amend" -a --amend &&
+	test_all_match git status --porcelain=v2
+'
+
 test_expect_success 'status/add: outside sparse cone' '
 	init_repos &&
 
@@ -514,14 +542,25 @@ test_expect_success 'sparse-index is expanded and converted back' '
 	test_region index ensure_full_index trace2.txt
 '
 
-test_expect_success 'sparse-index is not expanded' '
-	init_repos &&
-
+ensure_not_expanded () {
 	rm -f trace2.txt &&
 	echo >>sparse-index/untracked.txt &&
 	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
-		git -C sparse-index status &&
+		git -C sparse-index "$@" &&
 	test_region ! index ensure_full_index trace2.txt
+}
+
+test_expect_success 'sparse-index is not expanded' '
+	init_repos &&
+
+	ensure_not_expanded status &&
+	ensure_not_expanded commit --allow-empty -m empty &&
+	echo >>sparse-index/a &&
+	ensure_not_expanded commit -a -m a &&
+	echo >>sparse-index/a &&
+	ensure_not_expanded commit --include a -m a &&
+	echo >>sparse-index/deep/deeper1/a &&
+	ensure_not_expanded commit --include deep/deeper1/a -m deeper
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From 65e79b8037ca0268363db9ff967c1342a76485c5 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 22 Jun 2021 15:55:09 -0400
Subject: [PATCH 087/110] sparse-index: recompute cache-tree

When some commands run with command_requires_full_index=1, then the
index can get in a state where the in-memory cache tree is actually
equal to the sparse index's cache tree instead of the full one.

This results in incorrect entry_count values. By clearing the cache
tree before converting to sparse, we avoid this issue.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 sparse-index.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sparse-index.c b/sparse-index.c
index 53c8f711ccc82e..c6b4feec413a8f 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -170,6 +170,8 @@ int convert_to_sparse(struct index_state *istate)
 	if (index_has_unmerged_entries(istate))
 		return 0;
 
+	/* Clear and recompute the cache-tree */
+	cache_tree_free(&istate->cache_tree);
 	if (cache_tree_update(istate, 0)) {
 		warning(_("unable to update cache-tree, staying full"));
 		return -1;

From e9a9981477e9d6ba3aef96e39db5736b334d7189 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 25 May 2021 06:59:05 -0400
Subject: [PATCH 088/110] checkout: stop expanding sparse indexes

Previous changes did the necessary improvements to unpack-trees.c and
diff-lib.c in order to modify a sparse index based on its comparision
with a tree. The only remaining work is to remove some
ensure_full_index() calls and add tests that verify that the index is
not expanded in our interesting cases. Include 'switch' and 'restore' in
these tests, as they share a base implementation with 'checkout'.

Here are the relevant performance results from
p2000-sparse-operations.sh:

Test                                     HEAD~1           HEAD
--------------------------------------------------------------------------------
2000.18: git checkout -f - (full-v3)     0.49(0.43+0.03)  0.47(0.39+0.05) -4.1%
2000.19: git checkout -f - (full-v4)     0.45(0.37+0.06)  0.42(0.37+0.05) -6.7%
2000.20: git checkout -f - (sparse-v3)   0.76(0.71+0.07)  0.04(0.03+0.04) -94.7%
2000.21: git checkout -f - (sparse-v4)   0.75(0.72+0.04)  0.05(0.06+0.04) -93.3%

It is important to compare the full index case to the sparse index case,
as the previous results for the sparse index were inflated by the index
expansion. For index v4, this is an 88% improvement.

On an internal repository with over two million paths at HEAD and a
sparse-checkout definition containing ~60,000 of those paths, 'git
checkout' went from 3.5s to 297ms with this change. The theoretical
optimum where only those ~60,000 paths exist was 275ms, so the extra
sparse directory entries contribute a 22ms overhead.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/checkout.c                       |  8 +++-----
 t/t1092-sparse-checkout-compatibility.sh | 10 +++++++++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index f4cd7747d35dd1..b5d477919a7438 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -378,9 +378,6 @@ static int checkout_worktree(const struct checkout_opts *opts,
 	if (pc_workers > 1)
 		init_parallel_checkout();
 
-	/* TODO: audit for interaction with sparse-index. */
-	ensure_full_index(&the_index);
-
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
 		if (ce->ce_flags & CE_MATCHED) {
@@ -530,8 +527,6 @@ static int checkout_paths(const struct checkout_opts *opts,
 	 * Make sure all pathspecs participated in locating the paths
 	 * to be checked out.
 	 */
-	/* TODO: audit for interaction with sparse-index. */
-	ensure_full_index(&the_index);
 	for (pos = 0; pos < active_nr; pos++)
 		if (opts->overlay_mode)
 			mark_ce_for_checkout_overlay(active_cache[pos],
@@ -1593,6 +1588,9 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
 
 	git_config(git_checkout_config, opts);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	opts->track = BRANCH_TRACK_UNSPECIFIED;
 
 	if (!opts->accept_pathspec && !opts->accept_ref)
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index d3e34d0acaccc9..fde3b41aba8b86 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -560,7 +560,15 @@ test_expect_success 'sparse-index is not expanded' '
 	echo >>sparse-index/a &&
 	ensure_not_expanded commit --include a -m a &&
 	echo >>sparse-index/deep/deeper1/a &&
-	ensure_not_expanded commit --include deep/deeper1/a -m deeper
+	ensure_not_expanded commit --include deep/deeper1/a -m deeper &&
+	ensure_not_expanded checkout rename-out-to-out &&
+	ensure_not_expanded checkout - &&
+	ensure_not_expanded switch rename-out-to-out &&
+	ensure_not_expanded switch - &&
+	git -C sparse-index reset --hard &&
+	ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 &&
+	git -C sparse-index reset --hard &&
+	ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From 4b801c854fb7891a6530121281cff3f67451b283 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 19 Jul 2021 11:01:13 -0400
Subject: [PATCH 089/110] t1092: document bad 'git checkout' behavior

Add new branches to the test repo that demonstrate directory/file
conflicts in different ways. Since the directory 'folder1/' has
adjacent files 'folder1-', 'folder1.txt', and 'folder10' it causes
searches for 'folder1/' to land in a different place in the index than a
search for 'folder1'. This causes a change in behavior when working with
the df-conflict-1 and df-conflict-2 branches, whose only difference is
that the first uses 'folder1' as the conflict and the other uses
'folder2' which does not have these adjacent files.

We can extend two tests that compare the behavior across different 'git
checkout' commands, and we see already that the behavior will be
different in some cases and not in others. The difference between the
two test loops is that one uses 'git reset --hard' between iterations.

Further, we isolate the behavior of creating a staged change within a
directory and then checking out a branch where that directory is
replaced with a file. A full checkout behaves differently across these
two cases, while a sparse-checkout cone behaves consistently. In both
cases, the behavior is wrong. In one case, the staged change is dropped
entirely. The other case the staged change is kept, replacing the file
at that location, but none of the other files in the directory are kept.

Likely, the correct behavior in this case is to reject the checkout and
report the conflict, leaving HEAD in its previous location. None of the
cases behave this way currently. Use comments to demonstrate that the
tested behavior is only a documentation of the current, incorrect
behavior to ensure we do not _accidentally_ change it. Instead, we would
prefer to change it on purpose with a future change.

At this point, the sparse-index does not handle these 'git checkout'
commands correctly. Or rather, it _does_ reject the 'git checkout' when
we have the staged change, but for the wrong reason. It also rejects the
'git checkout' commands when there is no staged change and we want to
replace a directory with a file. A fix for that unstaged case will
follow in the next change, but that will make the sparse-index agree
with the full checkout case in these documented incorrect behaviors.

Helped-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 142 ++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index fde3b41aba8b86..79b4a8ce1999c6 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -95,6 +95,25 @@ test_expect_success 'setup' '
 		git add . &&
 		git commit -m "rename deep/deeper1/... to folder1/..." &&
 
+		git checkout -b df-conflict-1 base &&
+		rm -rf folder1 &&
+		echo content >folder1 &&
+		git add . &&
+		git commit -m "dir to file" &&
+
+		git checkout -b df-conflict-2 base &&
+		rm -rf folder2 &&
+		echo content >folder2 &&
+		git add . &&
+		git commit -m "dir to file" &&
+
+		git checkout -b fd-conflict base &&
+		rm a &&
+		mkdir a &&
+		echo content >a/a &&
+		git add . &&
+		git commit -m "file to dir" &&
+
 		git checkout -b deepest base &&
 		echo "updated deepest" >deep/deeper1/deepest/a &&
 		git commit -a -m "update deepest" &&
@@ -358,10 +377,16 @@ test_expect_success 'diff --staged' '
 	test_all_match git diff --staged
 '
 
+# NEEDSWORK: sparse-checkout behaves differently from full-checkout when
+# running this test with 'df-conflict-2' after 'df-conflict-1'.
 test_expect_success 'diff with renames and conflicts' '
 	init_repos &&
 
-	for branch in rename-out-to-out rename-out-to-in rename-in-to-out
+	for branch in rename-out-to-out \
+		      rename-out-to-in \
+		      rename-in-to-out \
+		      df-conflict-1 \
+		      fd-conflict
 	do
 		test_all_match git checkout rename-base &&
 		test_all_match git checkout $branch -- . &&
@@ -371,10 +396,15 @@ test_expect_success 'diff with renames and conflicts' '
 	done
 '
 
+# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file
+# conflict such as when checking out df-conflict-1 and df-conflict2.
 test_expect_success 'diff with directory/file conflicts' '
 	init_repos &&
 
-	for branch in rename-out-to-out rename-out-to-in rename-in-to-out
+	for branch in rename-out-to-out \
+		      rename-out-to-in \
+		      rename-in-to-out \
+		      fd-conflict
 	do
 		git -C full-checkout reset --hard &&
 		test_sparse_match git reset --hard &&
@@ -606,4 +636,112 @@ test_expect_success 'add everything with deep new file' '
 	test_all_match git status --porcelain=v2
 '
 
+# NEEDSWORK: 'git checkout' behaves incorrectly in the case of
+# directory/file conflicts, even without sparse-checkout. Use this
+# test only as a documentation of the incorrect behavior, not a
+# measure of how it _should_ behave.
+test_expect_success 'checkout behaves oddly with df-conflict-1' '
+	init_repos &&
+
+	test_sparse_match git sparse-checkout disable &&
+
+	write_script edit-content <<-\EOF &&
+	echo content >>folder1/larger-content
+	git add folder1
+	EOF
+
+	run_on_all ../edit-content &&
+	test_all_match git status --porcelain=v2 &&
+
+	git -C sparse-checkout sparse-checkout init --cone &&
+	git -C sparse-index sparse-checkout init --cone --sparse-index &&
+
+	test_all_match git status --porcelain=v2 &&
+
+	# This checkout command should fail, because we have a staged
+	# change to folder1/larger-content, but the destination changes
+	# folder1 to a file.
+	git -C full-checkout checkout df-conflict-1 \
+		1>full-checkout-out \
+		2>full-checkout-err &&
+	git -C sparse-checkout checkout df-conflict-1 \
+		1>sparse-checkout-out \
+		2>sparse-checkout-err &&
+
+	# NEEDSWORK: the sparse-index case refuses to change HEAD here,
+	# but for the wrong reason.
+	test_must_fail git -C sparse-index checkout df-conflict-1 \
+		1>sparse-index-out \
+		2>sparse-index-err &&
+
+	# Instead, the checkout deletes the folder1 file and adds the
+	# folder1/larger-content file, leaving all other paths that were
+	# in folder1/ as deleted (without any warning).
+	cat >expect <<-EOF &&
+	D	folder1
+	A	folder1/larger-content
+	EOF
+	test_cmp expect full-checkout-out &&
+	test_cmp expect sparse-checkout-out &&
+
+	# stderr: Switched to branch df-conflict-1
+	test_cmp full-checkout-err sparse-checkout-err
+'
+
+# NEEDSWORK: 'git checkout' behaves incorrectly in the case of
+# directory/file conflicts, even without sparse-checkout. Use this
+# test only as a documentation of the incorrect behavior, not a
+# measure of how it _should_ behave.
+test_expect_success 'checkout behaves oddly with df-conflict-2' '
+	init_repos &&
+
+	test_sparse_match git sparse-checkout disable &&
+
+	write_script edit-content <<-\EOF &&
+	echo content >>folder2/larger-content
+	git add folder2
+	EOF
+
+	run_on_all ../edit-content &&
+	test_all_match git status --porcelain=v2 &&
+
+	git -C sparse-checkout sparse-checkout init --cone &&
+	git -C sparse-index sparse-checkout init --cone --sparse-index &&
+
+	test_all_match git status --porcelain=v2 &&
+
+	# This checkout command should fail, because we have a staged
+	# change to folder1/larger-content, but the destination changes
+	# folder1 to a file.
+	git -C full-checkout checkout df-conflict-2 \
+		1>full-checkout-out \
+		2>full-checkout-err &&
+	git -C sparse-checkout checkout df-conflict-2 \
+		1>sparse-checkout-out \
+		2>sparse-checkout-err &&
+
+	# NEEDSWORK: the sparse-index case refuses to change HEAD
+	# here, but for the wrong reason.
+	test_must_fail git -C sparse-index checkout df-conflict-2 \
+		1>sparse-index-out \
+		2>sparse-index-err &&
+
+	# The full checkout deviates from the df-conflict-1 case here!
+	# It drops the change to folder1/larger-content and leaves the
+	# folder1 path as-is on disk.
+	test_must_be_empty full-checkout-out &&
+
+	# In the sparse-checkout case, the checkout deletes the folder1
+	# file and adds the folder1/larger-content file, leaving all other
+	# paths that were in folder1/ as deleted (without any warning).
+	cat >expect <<-EOF &&
+	D	folder2
+	A	folder2/larger-content
+	EOF
+	test_cmp expect sparse-checkout-out &&
+
+	# Switched to branch df-conflict-1
+	test_cmp full-checkout-err sparse-checkout-err
+'
+
 test_done

From 71e301501c88399711a1bf8515d1747e92cfbb9b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 19 Jul 2021 11:01:33 -0400
Subject: [PATCH 090/110] unpack-trees: resolve sparse-directory/file conflicts

When running unpack_trees() with a sparse index, we attempt to operate
on the index without expanding the sparse directory entries. Thus, we
operate by manipulating entire directories and passing them to the
unpack function. In the case of the 'git checkout' command, this is the
twoway_merge() function.

There are several cases in twoway_merge() that handle different
situations. One new one to add is the case of a directory/file conflict
where the directory is sparse. Before the sparse index, such a conflict
would appear as a list of file additions and deletions. Now,
twoway_merge() initializes 'current', 'oldtree', and 'newtree' from
src[0], src[1], and src[2], then sets 'oldtree' to NULL because it is
equal to the df_conflict_entry. The way to determine that we have a
directory/file conflict is to test that 'current' and 'newtree' disagree
on being sparse directory entries.

When we are in this case, we want to resolve the situation by calling
merged_entry(). This allows replacing the 'current' entry with the
'newtree' entry. This is important for cases where we want to run 'git
checkout' across the conflict and have the new HEAD represent the new
file type at that path. The first NEEDSWORK comment dropped in t1092
demonstrates this necessary behavior.

However, we still are in a confusing state when 'current' corresponds to
a staged change within a sparse directory that is not present at HEAD.
This should be atypical, because it requires adding a change outside of
the sparse-checkout cone, but it is possible. Since we are unable to
determine that this is a staged change within twoway_merge(), we cannot
add a case to reject the merge at this point. I believe this is due to
the use of df_conflict_entry in the place of 'oldtree' instead of using
the valud at HEAD, which would provide some perspective to this
decision. Any change that would allow this differentiation for staged
entries would need to involve information further up in unpack_trees().

That work should be done, sometime, because we are further confusing the
behavior of a directory/file conflict when staging a change in the
directory. The two cases 'checkout behaves oddly with df-conflict-?' in
t1092 demonstrate that even without a sparse-checkout, Git is not
consistent in its behavior. Neither of the two options seems correct,
either. This change makes the sparse-index behave differently than the
typcial sparse-checkout case, but it does match the full checkout
behavior in the df-conflict-2 case.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 24 ++++++++++++------------
 unpack-trees.c                           | 11 +++++++++++
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 79b4a8ce1999c6..91e30d6ec2248a 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -396,14 +396,14 @@ test_expect_success 'diff with renames and conflicts' '
 	done
 '
 
-# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file
-# conflict such as when checking out df-conflict-1 and df-conflict2.
 test_expect_success 'diff with directory/file conflicts' '
 	init_repos &&
 
 	for branch in rename-out-to-out \
 		      rename-out-to-in \
 		      rename-in-to-out \
+		      df-conflict-1 \
+		      df-conflict-2 \
 		      fd-conflict
 	do
 		git -C full-checkout reset --hard &&
@@ -667,10 +667,7 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' '
 	git -C sparse-checkout checkout df-conflict-1 \
 		1>sparse-checkout-out \
 		2>sparse-checkout-err &&
-
-	# NEEDSWORK: the sparse-index case refuses to change HEAD here,
-	# but for the wrong reason.
-	test_must_fail git -C sparse-index checkout df-conflict-1 \
+	git -C sparse-index checkout df-conflict-1 \
 		1>sparse-index-out \
 		2>sparse-index-err &&
 
@@ -684,7 +681,11 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' '
 	test_cmp expect full-checkout-out &&
 	test_cmp expect sparse-checkout-out &&
 
+	# The sparse-index reports no output
+	test_must_be_empty sparse-index-out &&
+
 	# stderr: Switched to branch df-conflict-1
+	test_cmp full-checkout-err sparse-checkout-err &&
 	test_cmp full-checkout-err sparse-checkout-err
 '
 
@@ -719,17 +720,15 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' '
 	git -C sparse-checkout checkout df-conflict-2 \
 		1>sparse-checkout-out \
 		2>sparse-checkout-err &&
-
-	# NEEDSWORK: the sparse-index case refuses to change HEAD
-	# here, but for the wrong reason.
-	test_must_fail git -C sparse-index checkout df-conflict-2 \
+	git -C sparse-index checkout df-conflict-2 \
 		1>sparse-index-out \
 		2>sparse-index-err &&
 
 	# The full checkout deviates from the df-conflict-1 case here!
 	# It drops the change to folder1/larger-content and leaves the
-	# folder1 path as-is on disk.
+	# folder1 path as-is on disk. The sparse-index behaves the same.
 	test_must_be_empty full-checkout-out &&
+	test_must_be_empty sparse-index-out &&
 
 	# In the sparse-checkout case, the checkout deletes the folder1
 	# file and adds the folder1/larger-content file, leaving all other
@@ -741,7 +740,8 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' '
 	test_cmp expect sparse-checkout-out &&
 
 	# Switched to branch df-conflict-1
-	test_cmp full-checkout-err sparse-checkout-err
+	test_cmp full-checkout-err sparse-checkout-err &&
+	test_cmp full-checkout-err sparse-index-err
 '
 
 test_done
diff --git a/unpack-trees.c b/unpack-trees.c
index 0a5135ab39745d..309c1352f5d280 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -2619,6 +2619,17 @@ int twoway_merge(const struct cache_entry * const *src,
 			 same(current, oldtree) && !same(current, newtree)) {
 			/* 20 or 21 */
 			return merged_entry(newtree, current, o);
+		} else if (current && !oldtree && newtree &&
+			   S_ISSPARSEDIR(current->ce_mode) != S_ISSPARSEDIR(newtree->ce_mode) &&
+			   ce_stage(current) == 0) {
+			/*
+			 * This case is a directory/file conflict across the sparse-index
+			 * boundary. When we are changing from one path to another via
+			 * 'git checkout', then we want to replace one entry with another
+			 * via merged_entry(). If there are staged changes, then we should
+			 * reject the merge instead.
+			 */
+			return merged_entry(newtree, current, o);
 		} else
 			return reject_merge(current, o);
 	}

From 8f2fd9370fe078923256381ddd6ff4534d736ec7 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Wed, 21 Jul 2021 14:23:11 -0400
Subject: [PATCH 091/110] t1092: test merge conflicts outside cone

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 38 ++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 91e30d6ec2248a..47f8e5e54e3f52 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -114,6 +114,16 @@ test_expect_success 'setup' '
 		git add . &&
 		git commit -m "file to dir" &&
 
+		for side in left right
+		do
+			git checkout -b merge-$side base &&
+			echo $side >>deep/deeper2/a &&
+			echo $side >>folder1/a &&
+			echo $side >>folder2/a &&
+			git add . &&
+			git commit -m "$side" || return 1
+		done &&
+
 		git checkout -b deepest base &&
 		echo "updated deepest" >deep/deeper1/deepest/a &&
 		git commit -a -m "update deepest" &&
@@ -482,6 +492,34 @@ test_expect_success 'merge' '
 	test_all_match git rev-parse HEAD^{tree}
 '
 
+test_expect_success 'merge with conflict outside cone' '
+	init_repos &&
+
+	test_all_match git checkout -b merge-tip merge-left &&
+	test_all_match git status --porcelain=v2 &&
+	test_all_match test_must_fail git merge -m merge merge-right &&
+	test_all_match git status --porcelain=v2 &&
+
+	# Resolve the conflict in different ways:
+	# 1. Revert to the base
+	test_all_match git checkout base -- deep/deeper2/a &&
+	test_all_match git status --porcelain=v2 &&
+
+	# 2. Add the file with conflict markers
+	test_all_match git add folder1/a &&
+	test_all_match git status --porcelain=v2 &&
+
+	# 3. Rename the file to another sparse filename and
+	#    accept conflict markers as resolved content.
+	run_on_all mv folder2/a folder2/z &&
+	test_all_match git add folder2 &&
+	test_all_match git status --porcelain=v2 &&
+
+	test_all_match git merge --continue &&
+	test_all_match git status --porcelain=v2 &&
+	test_all_match git rev-parse HEAD^{tree}
+'
+
 test_expect_success 'merge with outside renames' '
 	init_repos &&
 

From 6e43f118fa0b7a6ba2e841f75a90406d847d9f09 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 11 Jan 2021 21:26:41 -0500
Subject: [PATCH 092/110] add: allow operating on a sparse-only index

Disable command_requires_full_index for 'git add'. This does not require
any additional removals of ensure_full_index(). The main reason is that
'git add' discovers changes based on the pathspec and the worktree
itself. These are then inserted into the index directly, and calls to
index_name_pos() or index_file_exists() already call expand_to_path() at
the appropriate time to support a sparse-index.

Add a test to check that 'git add -A' and 'git add <file>' does not
expand the index at all, as long as <file> is not within a sparse
directory. This does not help the global 'git add .' case.

We can measure the improvement using p2000-sparse-operations.sh with
these results:

Test                                  HEAD~1           HEAD
------------------------------------------------------------------------------
2000.6: git add -A (full-index-v3)    0.35(0.30+0.05)  0.37(0.29+0.06) +5.7%
2000.7: git add -A (full-index-v4)    0.31(0.26+0.06)  0.33(0.27+0.06) +6.5%
2000.8: git add -A (sparse-index-v3)  0.57(0.53+0.07)  0.05(0.04+0.08) -91.2%
2000.9: git add -A (sparse-index-v4)  0.58(0.55+0.06)  0.05(0.05+0.06) -91.4%

While the 91% improvement seems impressive, it's important to recognize
that previously we had significant overhead for expanding the
sparse-index. Comparing to the full index case, 'git add -A' goes from
0.37s to 0.05s, which is "only" an 86% improvement.

This modification to 'git add' creates some behavior change depending on
the use of a sparse index. We modify a test in t1092 to demonstrate
these changes which will be remedied in future changes.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/add.c                            |  3 +++
 t/t1092-sparse-checkout-compatibility.sh | 25 +++++++++++++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index b773b5a4993e3e..c76e6ddd359e41 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -528,6 +528,9 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 	add_new_files = !take_worktree_changes && !refresh_only && !add_renormalize;
 	require_pathspec = !(take_worktree_changes || (0 < addremove_explicit));
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);
 
 	/*
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 47f8e5e54e3f52..19d38f18ed63b0 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -340,21 +340,27 @@ test_expect_success 'status/add: outside sparse cone' '
 
 	test_sparse_match git status --porcelain=v2 &&
 
-	# This "git add folder1/a" fails with a warning
-	# in the sparse repos, differing from the full
-	# repo. This is intentional.
+	# Adding the path outside of the sparse-checkout cone should fail.
 	test_sparse_match test_must_fail git add folder1/a &&
-	test_sparse_match test_must_fail git add --refresh folder1/a &&
-	test_all_match git status --porcelain=v2 &&
+
+	test_must_fail git -C sparse-checkout add --refresh folder1/a 2>sparse-checkout-err &&
+	test_must_fail git -C sparse-index add --refresh folder1/a 2>sparse-index-err &&
+	# NEEDSWORK: A sparse index changes the error message.
+	! test_cmp sparse-checkout-err sparse-index-err &&
+
+	# NEEDSWORK: Adding a newly-tracked file outside the cone succeeds
+	test_sparse_match git add folder1/new &&
 
 	test_all_match git add . &&
 	test_all_match git status --porcelain=v2 &&
 	test_all_match git commit -m folder1/new &&
+	test_all_match git rev-parse HEAD^{tree} &&
 
 	run_on_all ../edit-contents folder1/newer &&
 	test_all_match git add folder1/ &&
 	test_all_match git status --porcelain=v2 &&
-	test_all_match git commit -m folder1/newer
+	test_all_match git commit -m folder1/newer &&
+	test_all_match git rev-parse HEAD^{tree}
 '
 
 test_expect_success 'checkout and reset --hard' '
@@ -636,7 +642,12 @@ test_expect_success 'sparse-index is not expanded' '
 	git -C sparse-index reset --hard &&
 	ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 &&
 	git -C sparse-index reset --hard &&
-	ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1
+	ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 &&
+
+	echo >>sparse-index/README.md &&
+	ensure_not_expanded add -A &&
+	echo >>sparse-index/extra.txt &&
+	ensure_not_expanded add extra.txt
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From 2ae91e0af29bcc5ba118cba4ba051deefd3b9764 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 12 Jan 2021 11:19:24 -0500
Subject: [PATCH 093/110] pathspec: stop calling ensure_full_index

The add_pathspec_matches_against_index() focuses on matching a pathspec
to file entries in the index. This already works correctly for its only
use: checking if untracked files exist in the index.

The compatibility checks in t1092 already test that 'git add <dir>'
works for a directory outside of the sparse cone. That provides coverage
for removing this guard.

This finalizes our ability to run 'git add .' without expanding a sparse
index to a full one. This is evidenced by an update to t1092 and by
these performance numbers for p2000-sparse-operations.sh:

Test                                    HEAD~1            HEAD
--------------------------------------------------------------------------------
2000.10: git add . (full-index-v3)      0.37(0.28+0.07)   0.36(0.27+0.06) -2.7%
2000.11: git add . (full-index-v4)      0.33(0.26+0.06)   0.32(0.28+0.05) -3.0%
2000.12: git add . (sparse-index-v3)    0.57(0.53+0.07)   0.06(0.06+0.07) -89.5%
2000.13: git add . (sparse-index-v4)    0.57(0.53+0.07)   0.05(0.03+0.09) -91.2%

While the ~90% improvement is shown by the test results, it is worth
noting that expanding the sparse index was adding overhead in previous
commits. Comparing to the full index case, we see the performance go
from 0.33s to 0.05s, an 85% improvement.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 pathspec.c                               | 2 --
 t/t1092-sparse-checkout-compatibility.sh | 7 +++----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 08f8d3eedc39aa..44306fdaca2ee8 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -37,8 +37,6 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec,
 			num_unmatched++;
 	if (!num_unmatched)
 		return;
-	/* TODO: audit for interaction with sparse-index. */
-	ensure_full_index(istate);
 	for (i = 0; i < istate->cache_nr; i++) {
 		const struct cache_entry *ce = istate->cache[i];
 		if (sw_action == PS_IGNORE_SKIP_WORKTREE && ce_skip_worktree(ce))
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 19d38f18ed63b0..50dce0e0f99e4a 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -322,9 +322,6 @@ test_expect_success 'commit including unstaged changes' '
 test_expect_success 'status/add: outside sparse cone' '
 	init_repos &&
 
-	# adding a "missing" file outside the cone should fail
-	test_sparse_match test_must_fail git add folder1/a &&
-
 	# folder1 is at HEAD, but outside the sparse cone
 	run_on_sparse mkdir folder1 &&
 	cp initial-repo/folder1/a sparse-checkout/folder1/a &&
@@ -647,7 +644,9 @@ test_expect_success 'sparse-index is not expanded' '
 	echo >>sparse-index/README.md &&
 	ensure_not_expanded add -A &&
 	echo >>sparse-index/extra.txt &&
-	ensure_not_expanded add extra.txt
+	ensure_not_expanded add extra.txt &&
+	echo >>sparse-index/untracked.txt &&
+	ensure_not_expanded add .
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From a79728d4c648533468716ea1dcc16b3d4e0673b9 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 29 Jun 2021 17:02:36 -0400
Subject: [PATCH 094/110] add: ignore outside the sparse-checkout in refresh()

Since b243012 (refresh_index(): add flag to ignore SKIP_WORKTREE
entries, 2021-04-08), 'git add --refresh <path>' will output a warning
message when the path is outside the sparse-checkout definition. The
implementation of this warning happened in parallel with the
sparse-index work to add ensure_full_index() calls throughout the
codebase.

Update this loop to have the proper logic that checks to see if the
pathspec is outside the sparse-checkout definition. This avoids the need
to expand the sparse directory entry and determine if the path is
tracked, untracked, or ignored. We simply avoid updating the stat()
information because there isn't even an entry that matches the path!

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/add.c                            | 10 +++++++++-
 t/t1092-sparse-checkout-compatibility.sh |  6 +-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index c76e6ddd359e41..d512ece655bce1 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -192,13 +192,21 @@ static int refresh(int verbose, const struct pathspec *pathspec)
 	struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP;
 	int flags = REFRESH_IGNORE_SKIP_WORKTREE |
 		    (verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET);
+	struct pattern_list pl = { 0 };
+	int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl);
 
 	seen = xcalloc(pathspec->nr, 1);
 	refresh_index(&the_index, flags, pathspec, seen,
 		      _("Unstaged changes after refreshing the index:"));
 	for (i = 0; i < pathspec->nr; i++) {
 		if (!seen[i]) {
-			if (matches_skip_worktree(pathspec, i, &skip_worktree_seen)) {
+			const char *path = pathspec->items[i].original;
+			int dtype = DT_REG;
+
+			if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) ||
+			    (sparse_checkout_enabled &&
+			     !path_matches_pattern_list(path, strlen(path), NULL,
+							&dtype, &pl, &the_index))) {
 				string_list_append(&only_match_skip_worktree,
 						   pathspec->items[i].original);
 			} else {
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 50dce0e0f99e4a..3e201546577ad6 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -339,11 +339,7 @@ test_expect_success 'status/add: outside sparse cone' '
 
 	# Adding the path outside of the sparse-checkout cone should fail.
 	test_sparse_match test_must_fail git add folder1/a &&
-
-	test_must_fail git -C sparse-checkout add --refresh folder1/a 2>sparse-checkout-err &&
-	test_must_fail git -C sparse-index add --refresh folder1/a 2>sparse-index-err &&
-	# NEEDSWORK: A sparse index changes the error message.
-	! test_cmp sparse-checkout-err sparse-index-err &&
+	test_sparse_match test_must_fail git add --refresh folder1/a &&
 
 	# NEEDSWORK: Adding a newly-tracked file outside the cone succeeds
 	test_sparse_match git add folder1/new &&

From 1543550a4e8cc4dda495c08c924034291001386a Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 10:06:09 -0400
Subject: [PATCH 095/110] add: remove ensure_full_index() with --renormalize

The --renormalize option updates the EOL conversions for the tracked
files. However, the loop already ignores files marked with the
SKIP_WORKTREE bit, so it will continue to do so with a sparse index
because the sparse directory entries also have this bit set.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/add.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index d512ece655bce1..c49e179abc36fa 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -144,8 +144,6 @@ static int renormalize_tracked_files(const struct pathspec *pathspec, int flags)
 {
 	int i, retval = 0;
 
-	/* TODO: audit for interaction with sparse-index. */
-	ensure_full_index(&the_index);
 	for (i = 0; i < active_nr; i++) {
 		struct cache_entry *ce = active_cache[i];
 

From 90dacd693c063cacf617b9a615407b27641156fb Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 21 Jun 2021 13:09:06 -0400
Subject: [PATCH 096/110] sparse-index: silently return when not using
 cone-mode patterns

While the sparse-index is only enabled when core.sparseCheckoutCone is
also enabled, it is possible for the user to modify the sparse-checkout
file manually in a way that does not match cone-mode patterns. In this
case, we should refuse to convert an index into a sparse index, since
the sparse_checkout_patterns will not be initialized with recursive and
parent path hashsets.

Also silently return if there are no cache entries, which is a simple
case: there are no paths to make sparse!

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 sparse-index.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sparse-index.c b/sparse-index.c
index c6b4feec413a8f..bc5900eae35b45 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -130,7 +130,8 @@ static int index_has_unmerged_entries(struct index_state *istate)
 int convert_to_sparse(struct index_state *istate)
 {
 	int test_env;
-	if (istate->split_index || istate->sparse_index ||
+
+	if (istate->split_index || istate->sparse_index || !istate->cache_nr ||
 	    !core_apply_sparse_checkout || !core_sparse_checkout_cone)
 		return 0;
 
@@ -158,10 +159,16 @@ int convert_to_sparse(struct index_state *istate)
 			return 0;
 	}
 
-	if (!istate->sparse_checkout_patterns->use_cone_patterns) {
-		warning(_("attempting to use sparse-index without cone mode"));
-		return -1;
-	}
+	/*
+	 * We need cone-mode patterns to use sparse-index. If a user edits
+	 * their sparse-checkout file manually, then we can detect during
+	 * parsing that they are not actually using cone-mode patterns and
+	 * hence we need to abort this conversion _without error_. Warnings
+	 * already exist in the pattern parsing to inform the user of their
+	 * bad patterns.
+	 */
+	if (!istate->sparse_checkout_patterns->use_cone_patterns)
+		return 0;
 
 	/*
 	 * NEEDSWORK: If we have unmerged entries, then stay full.

From 002a44a5975568374cd5d75b84f1187ed244d827 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 22 Jun 2021 21:46:27 -0400
Subject: [PATCH 097/110] sparse-index: silently return when cache tree fails

If cache_tree_update() returns a non-zero value, then it could not
create the cache tree. This is likely due to a path having a merge
conflict. Since we are already returning early, let's return silently to
avoid making it seem like we failed to write the index at all.

If we remove our dependence on the cache tree within
convert_to_sparse(), then we could still recover from this scenario and
have a sparse index.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 sparse-index.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sparse-index.c b/sparse-index.c
index bc5900eae35b45..0e0f731335019d 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -179,10 +179,12 @@ int convert_to_sparse(struct index_state *istate)
 
 	/* Clear and recompute the cache-tree */
 	cache_tree_free(&istate->cache_tree);
-	if (cache_tree_update(istate, 0)) {
-		warning(_("unable to update cache-tree, staying full"));
-		return -1;
-	}
+	/*
+	 * Silently return if there is a problem with the cache tree update,
+	 * which might just be due to a conflict state in some entry.
+	 */
+	if (cache_tree_update(istate, 0))
+		return 0;
 
 	remove_fsmonitor(istate);
 

From ec61bc5c86d427e7951c597026c10f272666d4d9 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Thu, 24 Jun 2021 20:52:24 -0400
Subject: [PATCH 098/110] sparse-index: use WRITE_TREE_MISSING_OK when updating
 cache-tree

When constructing the cache-tree extension in convert_to_sparse(), it is
possible that we construct a tree object that is new to the object
database. Without the WRITE_TREE_MISSING_OK flag, this results in an
error that halts our conversion to a sparse index. Add this flag to
remove this limitation.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 sparse-index.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sparse-index.c b/sparse-index.c
index 0e0f731335019d..b6e90417556afc 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -182,8 +182,11 @@ int convert_to_sparse(struct index_state *istate)
 	/*
 	 * Silently return if there is a problem with the cache tree update,
 	 * which might just be due to a conflict state in some entry.
+	 *
+	 * This might create new tree objects, so be sure to use
+	 * WRITE_TREE_MISSING_OK.
 	 */
-	if (cache_tree_update(istate, 0))
+	if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
 		return 0;
 
 	remove_fsmonitor(istate);

From a3415bcd295e3d25e8751ac992ab5ca41b806767 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 5 Jul 2021 12:43:20 -0400
Subject: [PATCH 099/110] unpack-trees: fix nested sparse-dir search

The iterated search in find_cache_entry() was recently modified to
include a loop that searches backwards for a sparse directory entry that
matches the given traverse_info and name_entry. However, the string
comparison failed to actually concatenate those two strings, so this
failed to find a sparse directory when it was not a top-level directory.

This caused some errors in rare cases where a 'git checkout' spanned a
diff that modified files within the sparse directory entry, but we could
not correctly find the entry.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 unpack-trees.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index 309c1352f5d280..ae3da41261009f 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1266,9 +1266,10 @@ static int sparse_dir_matches_path(const struct cache_entry *ce,
 static struct cache_entry *find_cache_entry(struct traverse_info *info,
 					    const struct name_entry *p)
 {
-	struct cache_entry *ce;
+	struct cache_entry *ce = NULL;
 	int pos = find_cache_pos(info, p->path, p->pathlen);
 	struct unpack_trees_options *o = info->data;
+	struct strbuf full_path = STRBUF_INIT;
 
 	if (0 <= pos)
 		return o->src_index->cache[pos];
@@ -1284,6 +1285,10 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
 	if (pos < 0 || pos >= o->src_index->cache_nr)
 		return NULL;
 
+	strbuf_addstr(&full_path, info->traverse_path);
+	strbuf_add(&full_path, p->path, p->pathlen);
+	strbuf_addch(&full_path, '/');
+
 	/*
 	 * Due to lexicographic sorting and sparse directory
 	 * entries ending with a trailing slash, our path as a
@@ -1294,17 +1299,20 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
 	while (pos >= 0) {
 		ce = o->src_index->cache[pos];
 
-		if (strncmp(ce->name, p->path, p->pathlen))
-			return NULL;
+		if (strncmp(ce->name, full_path.buf, full_path.len)) {
+			ce = NULL;
+			break;
+		}
 
 		if (S_ISSPARSEDIR(ce->ce_mode) &&
 		    sparse_dir_matches_path(ce, info, p))
-			return ce;
+			break;
 
 		pos--;
 	}
 
-	return NULL;
+	strbuf_release(&full_path);
+	return ce;
 }
 
 static void debug_path(struct traverse_info *info)

From 6bfa9201dc8a1fa623b9dbdccfce2c2b8d782252 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 5 Jul 2021 11:57:06 -0400
Subject: [PATCH 100/110] t7519: rewrite sparse index test

The sparse index is tested with the FS Monitor hook and extension since
f8fe49e (fsmonitor: integrate with sparse index, 2021-07-14). This test
was very fragile because it shared an index across sparse and non-sparse
behavior. Since that expansion and contraction could cause the index to
lose its FS Monitor bitmap and token, behavior is fragile to changes in
'git sparse-checkout set'.

Rewrite the test to use two clones of the original repo: full and
sparse. This allows us to also keep the test files (actual, expect,
trace2.txt) out of the repos we are testing with 'git status'.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t7519-status-fsmonitor.sh | 38 ++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index deea88d4431d23..6f2cf306f66588 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -389,43 +389,47 @@ test_expect_success 'status succeeds after staging/unstaging' '
 # If "!" is supplied, then we verify that we do not call ensure_full_index
 # during a call to 'git status'. Otherwise, we verify that we _do_ call it.
 check_sparse_index_behavior () {
-	git status --porcelain=v2 >expect &&
-	git sparse-checkout init --cone --sparse-index &&
-	git sparse-checkout set dir1 dir2 &&
+	git -C full status --porcelain=v2 >expect &&
 	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
-		git status --porcelain=v2 >actual &&
+		git -C sparse status --porcelain=v2 >actual &&
 	test_region $1 index ensure_full_index trace2.txt &&
 	test_region fsm_hook query trace2.txt &&
 	test_cmp expect actual &&
-	rm trace2.txt &&
-	git sparse-checkout disable
+	rm trace2.txt
 }
 
 test_expect_success 'status succeeds with sparse index' '
-	git reset --hard &&
+	git clone . full &&
+	git clone . sparse &&
+	git -C sparse sparse-checkout init --cone --sparse-index &&
+	git -C sparse sparse-checkout set dir1 dir2 &&
 
-	test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" &&
-	check_sparse_index_behavior ! &&
-
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 	EOF
-	git config core.fsmonitor .git/hooks/fsmonitor-test &&
+	git -C full config core.fsmonitor ../.git/hooks/fsmonitor-test &&
+	git -C sparse config core.fsmonitor ../.git/hooks/fsmonitor-test &&
 	check_sparse_index_behavior ! &&
 
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 		printf "dir1/modified\0"
 	EOF
 	check_sparse_index_behavior ! &&
 
-	cp -r dir1 dir1a &&
-	git add dir1a &&
-	git commit -m "add dir1a" &&
+	git -C sparse sparse-checkout add dir1a &&
+
+	for repo in full sparse
+	do
+		cp -r $repo/dir1 $repo/dir1a &&
+		git -C $repo add dir1a &&
+		git -C $repo commit -m "add dir1a" || return 1
+	done &&
+	git -C sparse sparse-checkout set dir1 dir2 &&
 
 	# This one modifies outside the sparse-checkout definition
 	# and hence we expect to expand the sparse-index.
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	write_script .git/hooks/fsmonitor-test <<-\EOF &&
 		printf "last_update_token\0"
 		printf "dir1a/modified\0"
 	EOF

From 2d294edb97b6452e9797b78aa08a698ee00f43fb Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Sun, 4 Jul 2021 17:06:12 -0400
Subject: [PATCH 101/110] sparse-checkout: clear tracked sparse dirs

When changing the scope of a sparse-checkout using cone mode, we might
have some tracked directories go out of scope. The current logic removes
the tracked files from within those directories, but leaves the ignored
files within those directories. This is a bit unexpected to users who
have given input to Git saying they don't need those directories
anymore.

This is something that is new to the cone mode pattern type: the user
has explicitly said "I want these directories and _not_ those
directories." The typical sparse-checkout patterns more generally apply
to "I want files with with these patterns" so it is natural to leave
ignored files as they are. This focus on directories in cone mode
provides us an opportunity to change the behavior.

Leaving these ignored files in the sparse directories makes it
impossible to gain performance benefits in the sparse index. When we
track into these directories, we need to know if the files are ignored
or not, which might depend on the _tracked_ .gitignore file(s) within
the sparse directory. This depends on the indexed version of the file,
so the sparse directory must be expanded.

By deleting the sparse directories when changing scope (or running 'git
sparse-checkout reapply') we regain these performance benefits as if the
repository was in a clean state.

Since these ignored files are frequently build output or helper files
from IDEs, the users should not need the files now that the tracked
files are removed. If the tracked files reappear, then they will have
newer timestamps than the build artifacts, so the artifacts will need to
be regenerated anyway.

If users depend on ignored files within the sparse directories, then
they have created a bad shape in their repository. Regardless, such
shapes would create risk that changing the behavior for all cone mode
users might be too risky to take on at the moment. Since this data shape
makes it impossible to get performance benefits using the sparse index, we limit the change
to only be enabled when the sparse index is enabled. Users can opt out
of this behavior by disabline the sparse index.

Depending on user feedback or real-world use, we might want to consider
expanding the behavior change to all of cone mode. Since we are
currently restricting to the sparse index case, we can use the existence
of sparse directory entries in the index as indicators of which
directories should be removed. The contained files are deleted by a
'git clean -dfx' subcommand while the directories themselves are deleted
once they are empty.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/sparse-checkout.c          | 78 ++++++++++++++++++++++++++++++
 t/t1091-sparse-checkout-builtin.sh | 42 ++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index a4bdd7c4940260..4448daba27e0cb 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -15,6 +15,7 @@
 #include "wt-status.h"
 #include "quote.h"
 #include "sparse-index.h"
+#include "run-command.h"
 
 static const char *empty_base = "";
 
@@ -100,6 +101,76 @@ static int sparse_checkout_list(int argc, const char **argv)
 	return 0;
 }
 
+static void clean_tracked_sparse_directories(struct repository *r)
+{
+	int i;
+	struct strvec args = STRVEC_INIT;
+
+	/*
+	 * If we are not using cone mode patterns, then we cannot
+	 * delete directories outside of the sparse cone.
+	 */
+	if (!r || !r->index || !r->index->sparse_checkout_patterns ||
+	    !r->index->sparse_checkout_patterns->use_cone_patterns)
+		return;
+	/*
+	 * NEEDSWORK: For now, only use this behavior when index.sparse
+	 * is enabled. We may want this behavior enabled whenever using
+	 * cone mode patterns.
+	 */
+	prepare_repo_settings(r);
+	if (!r->settings.sparse_index)
+		return;
+
+	strvec_pushl(&args, "clean", "-dfx", "--", NULL);
+
+	/*
+	 * Since we now depend on the sparse index to enable this
+	 * behavior, use it to our advantage. This process is more
+	 * complicated without it.
+	 */
+	convert_to_sparse(r->index);
+
+	for (i = 0; i < r->index->cache_nr; i++) {
+		struct cache_entry *ce = r->index->cache[i];
+
+		/*
+		 * Is this a sparse directory? If so, then definitely
+		 * include it. All contained content is outside of the
+		 * patterns.
+		 */
+		if (S_ISSPARSEDIR(ce->ce_mode) &&
+		    repo_file_exists(r, ce->name)) {
+			strvec_push(&args, ce->name);
+			continue;
+		}
+	}
+
+	/*
+	 * Only run if we found an existing sparse directory, otherwise
+	 * the clean will be across the entire worktree!
+	 */
+	if (args.nr > 3)
+		run_command_v_opt(args.v, RUN_GIT_CMD);
+
+	/*
+	 * The 'git clean -dfx -- <path> ...' command empties the
+	 * tracked directories outside of the sparse cone, but does not
+	 * delete the directories themselves. Remove them now.
+	 */
+	for (i = 3; i < args.nr; i++)
+		rmdir_or_warn(args.v[i]);
+
+	strvec_clear(&args);
+
+	/*
+	 * This is temporary: the sparse-checkout builtin is not
+	 * integrated with the sparse-index yet, so we need to keep
+	 * it full during the process.
+	 */
+	ensure_full_index(r->index);
+}
+
 static int update_working_directory(struct pattern_list *pl)
 {
 	enum update_sparsity_result result;
@@ -141,6 +212,8 @@ static int update_working_directory(struct pattern_list *pl)
 	else
 		rollback_lock_file(&lock_file);
 
+	clean_tracked_sparse_directories(r);
+
 	r->index->sparse_checkout_patterns = NULL;
 	return result;
 }
@@ -540,8 +613,11 @@ static int modify_pattern_list(int argc, const char **argv, enum modify_type m)
 {
 	int result;
 	int changed_config = 0;
+	struct pattern_list *old_pl = xcalloc(1, sizeof(*old_pl));
 	struct pattern_list *pl = xcalloc(1, sizeof(*pl));
 
+	get_sparse_checkout_patterns(old_pl);
+
 	switch (m) {
 	case ADD:
 		if (core_sparse_checkout_cone)
@@ -567,7 +643,9 @@ static int modify_pattern_list(int argc, const char **argv, enum modify_type m)
 		set_config(MODE_NO_PATTERNS);
 
 	clear_pattern_list(pl);
+	clear_pattern_list(old_pl);
 	free(pl);
+	free(old_pl);
 	return result;
 }
 
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 38fc8340f5c9b7..43eb314c94a5cb 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -642,4 +642,46 @@ test_expect_success MINGW 'cone mode replaces backslashes with slashes' '
 	check_files repo/deep a deeper1
 '
 
+test_expect_success 'cone mode clears ignored subdirectories' '
+	rm repo/.git/info/sparse-checkout &&
+
+	# NEEDSWORK: --sparse-index is required for now
+	git -C repo sparse-checkout init --cone --sparse-index &&
+	git -C repo sparse-checkout set deep/deeper1 &&
+
+	cat >repo/.gitignore <<-\EOF &&
+	obj/
+	*.o
+	EOF
+
+	git -C repo add .gitignore &&
+	git -C repo commit -m ".gitignore" &&
+
+	mkdir -p repo/obj repo/folder1/obj repo/deep/deeper2/obj &&
+	for file in folder1/obj/a obj/a folder1/file.o folder1.o \
+		    deep/deeper2/obj/a deep/deeper2/file.o file.o
+	do
+		echo ignored >repo/$file || return 1
+	done &&
+
+	git -C repo status --porcelain=v2 >out &&
+	test_must_be_empty out &&
+
+	git -C repo sparse-checkout reapply &&
+	test_path_is_missing repo/folder1 &&
+	test_path_is_missing repo/deep/deeper2 &&
+	test_path_is_dir repo/obj &&
+	test_path_is_file repo/file.o &&
+
+	git -C repo status --porcelain=v2 >out &&
+	test_must_be_empty out &&
+
+	git -C repo sparse-checkout set deep/deeper2 &&
+	test_path_is_missing repo/deep/deeper1 &&
+	test_path_is_dir repo/deep/deeper2 &&
+
+	git -C repo status --porcelain=v2 >out &&
+	test_must_be_empty out
+'
+
 test_done

From 35e7978fcd071d3c342d8a563a53033f3050ed01 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 13:22:55 -0400
Subject: [PATCH 102/110] t1092: use ORT merge strategy

The sparse index will be compatible with the ORT merge strategy, so
let's use it explicitly in our tests.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 3e201546577ad6..3425a9c649f2cb 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -7,6 +7,10 @@ GIT_TEST_SPARSE_INDEX=
 
 . ./test-lib.sh
 
+# Force the use of the ORT merge algorithm until testing with the
+# recursive strategy. We expect ORT to be used with sparse-index.
+export GIT_TEST_MERGE_ALGORITHM=ort
+
 test_expect_success 'setup' '
 	git init initial-repo &&
 	(
@@ -496,7 +500,7 @@ test_expect_success 'merge with conflict outside cone' '
 
 	test_all_match git checkout -b merge-tip merge-left &&
 	test_all_match git status --porcelain=v2 &&
-	test_all_match test_must_fail git merge -m merge merge-right &&
+	test_all_match test_must_fail git merge -sort -m merge merge-right &&
 	test_all_match git status --porcelain=v2 &&
 
 	# Resolve the conflict in different ways:
@@ -526,7 +530,7 @@ test_expect_success 'merge with outside renames' '
 	do
 		test_all_match git reset --hard &&
 		test_all_match git checkout -f -b merge-$type update-deep &&
-		test_all_match git merge -m "$type" rename-$type &&
+		test_all_match git merge -sort -m "$type" rename-$type &&
 		test_all_match git rev-parse HEAD^{tree} || return 1
 	done
 '

From 0acbbd626e572e4cc82f57fae9e5c1b7d5bbb19b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 15:44:05 -0400
Subject: [PATCH 103/110] sparse-checkout: create helper methods

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 dir.c          | 29 +++++++++++++++++++++++++++++
 dir.h          |  6 ++++++
 sparse-index.c |  7 ++-----
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/dir.c b/dir.c
index 0c5264b3b20a2f..9032d90af34574 100644
--- a/dir.c
+++ b/dir.c
@@ -1442,6 +1442,35 @@ enum pattern_match_result path_matches_pattern_list(
 	return result;
 }
 
+int init_sparse_checkout_patterns(struct index_state *istate)
+{
+	if (istate->sparse_checkout_patterns)
+		return 0;
+
+	CALLOC_ARRAY(istate->sparse_checkout_patterns, 1);
+
+	if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) {
+		FREE_AND_NULL(istate->sparse_checkout_patterns);
+		return -1;
+	}
+
+	return 0;
+}
+
+enum pattern_match_result path_in_sparse_checkout(const char *path,
+						  struct index_state *istate)
+{
+	int dtype = DT_REG;
+	init_sparse_checkout_patterns(istate);
+
+	if (!istate->sparse_checkout_patterns)
+		return MATCHED;
+
+	return path_matches_pattern_list(path, strlen(path), NULL, &dtype,
+					 istate->sparse_checkout_patterns,
+					 istate);
+}
+
 static struct path_pattern *last_matching_pattern_from_lists(
 		struct dir_struct *dir, struct index_state *istate,
 		const char *pathname, int pathlen,
diff --git a/dir.h b/dir.h
index e3db9b9ec658ee..6599a0173268a3 100644
--- a/dir.h
+++ b/dir.h
@@ -394,6 +394,12 @@ enum pattern_match_result path_matches_pattern_list(const char *pathname,
 				const char *basename, int *dtype,
 				struct pattern_list *pl,
 				struct index_state *istate);
+
+int init_sparse_checkout_patterns(struct index_state *state);
+
+enum pattern_match_result path_in_sparse_checkout(const char *path,
+						  struct index_state *istate);
+
 struct dir_entry *dir_add_ignored(struct dir_struct *dir,
 				  struct index_state *istate,
 				  const char *pathname, int len);
diff --git a/sparse-index.c b/sparse-index.c
index b6e90417556afc..8edf9e39645ecd 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -153,11 +153,8 @@ int convert_to_sparse(struct index_state *istate)
 	if (!istate->repo->settings.sparse_index)
 		return 0;
 
-	if (!istate->sparse_checkout_patterns) {
-		istate->sparse_checkout_patterns = xcalloc(1, sizeof(struct pattern_list));
-		if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0)
-			return 0;
-	}
+	if (init_sparse_checkout_patterns(istate) < 0)
+		return 0;
 
 	/*
 	 * We need cone-mode patterns to use sparse-index. If a user edits

From 3b245806ee7764f6340aa0d16fccbcc10e8ab8b4 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 15:15:44 -0400
Subject: [PATCH 104/110] attr: be careful about sparse directories

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 attr.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/attr.c b/attr.c
index 9e897e43f53196..ef4e8dae102931 100644
--- a/attr.c
+++ b/attr.c
@@ -14,6 +14,7 @@
 #include "utf8.h"
 #include "quote.h"
 #include "thread-utils.h"
+#include "dir.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -744,6 +745,17 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate,
 	if (!istate)
 		return NULL;
 
+	/*
+	 * In the case of cone-mode sparse-checkout, getting the
+	 * .gitattributes file from a directory is meaningless: all
+	 * contained paths will be sparse if the .gitattributes is also
+	 * sparse. In the case of a sparse index, it is critical that we
+	 * don't go looking for one as it will expand the index.
+	 */
+	if (core_sparse_checkout_cone &&
+	    path_in_sparse_checkout(path, istate) == NOT_MATCHED)
+		return NULL;
+
 	buf = read_blob_data_from_index(istate, path, NULL);
 	if (!buf)
 		return NULL;

From 15d9ae1bf97e07ce550073f9adecc0a100af2748 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 15:43:05 -0400
Subject: [PATCH 105/110] diff: ignore sparse paths in diffstat

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 diff.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/diff.c b/diff.c
index 52c791574b71c8..8da187b815ff00 100644
--- a/diff.c
+++ b/diff.c
@@ -26,6 +26,7 @@
 #include "parse-options.h"
 #include "help.h"
 #include "promisor-remote.h"
+#include "dir.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -3896,6 +3897,16 @@ static int reuse_worktree_file(struct index_state *istate,
 	if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
 		return 0;
 
+	/*
+	 * If this path does not match our sparse-checkout definition,
+	 * then the file will not be in the working directory.
+	 */
+	if (!init_sparse_checkout_patterns(istate) &&
+	    istate->sparse_checkout_patterns &&
+	    istate->sparse_checkout_patterns->use_cone_patterns &&
+	    path_in_sparse_checkout(name, istate) == NOT_MATCHED)
+		return 0;
+
 	/*
 	 * Similarly, if we'd have to convert the file contents anyway, that
 	 * makes the optimization not worthwhile.

From fda370974bc1f143c3af7564e3c86c937454f74c Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 14:48:54 -0400
Subject: [PATCH 106/110] merge: make sparse-aware with ORT

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/merge.c                          |  3 ++
 merge-ort.c                              |  8 ++++++
 merge-recursive.c                        |  3 ++
 t/t1092-sparse-checkout-compatibility.sh | 35 ++++++++++++++++++++++--
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index a8a843b1f54113..82436f87913806 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1275,6 +1275,9 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
 	if (argc == 2 && !strcmp(argv[1], "-h"))
 		usage_with_options(builtin_merge_usage, builtin_merge_options);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	/*
 	 * Check if we are _not_ on a detached HEAD, i.e. if there is a
 	 * current branch.
diff --git a/merge-ort.c b/merge-ort.c
index b954f7184a5f56..1149d02941aa4a 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3606,6 +3606,14 @@ static int record_conflicted_index_entries(struct merge_options *opt)
 	if (strmap_empty(&opt->priv->conflicted))
 		return 0;
 
+	/*
+	 * We are in a conflicted state. These conflicts might be inside
+	 * sparse-directory entries, so expand the index preemtively.
+	 * Also, we set original_cache_nr below, but that might change if
+	 * index_name_pos() calls ask for paths within sparse directories.
+	 */
+	ensure_full_index(index);
+
 	/* If any entries have skip_worktree set, we'll have to check 'em out */
 	state.force = 1;
 	state.quiet = 1;
diff --git a/merge-recursive.c b/merge-recursive.c
index d146bb116f73cd..693846de5d6d19 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -3747,6 +3747,9 @@ int merge_recursive(struct merge_options *opt,
 	assert(opt->ancestor == NULL ||
 	       !strcmp(opt->ancestor, "constructed merge base"));
 
+	prepare_repo_settings(opt->repo);
+	opt->repo->settings.command_requires_full_index = 1;
+
 	if (merge_start(opt, repo_get_commit_tree(opt->repo, h1)))
 		return -1;
 	clean = merge_recursive_internal(opt, h1, h2, merge_bases, result);
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 3425a9c649f2cb..41a8e33458b9e8 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -51,7 +51,7 @@ test_expect_success 'setup' '
 		git checkout -b base &&
 		for dir in folder1 folder2 deep
 		do
-			git checkout -b update-$dir &&
+			git checkout -b update-$dir base &&
 			echo "updated $dir" >$dir/a &&
 			git commit -a -m "update $dir" || return 1
 		done &&
@@ -604,6 +604,31 @@ test_expect_success 'submodule handling' '
 	grep "160000 commit $(git -C initial-repo rev-parse HEAD)	modules/sub" cache
 '
 
+ensure_expanded () {
+	rm -f trace2.txt &&
+	echo >>sparse-index/untracked.txt &&
+	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
+		git -C sparse-index "$@" &&
+	test_region index ensure_full_index trace2.txt &&
+	test_region index convert_to_sparse trace2.txt
+}
+
+# Check that the given command fails, and only check for ensure_full_index
+# because convert_to_sparse only happens on an index write, especially
+# only on an index write that _can_ be converted (i.e. no merge conflicts).
+ensure_expanded_fail () {
+	rm -f trace2.txt &&
+	echo >>sparse-index/untracked.txt &&
+	(
+		GIT_TRACE2_EVENT="$(pwd)/trace2.txt" &&
+		GIT_TRACE2_EVENT_NESTING=10 &&
+		export GIT_TRACE2_EVENT &&
+		export GIT_TRACE2_EVENT_NESTING &&
+		test_must_fail git -C sparse-index "$@" &&
+		test_region index ensure_full_index trace2.txt
+	)
+}
+
 test_expect_success 'sparse-index is expanded and converted back' '
 	init_repos &&
 
@@ -646,7 +671,13 @@ test_expect_success 'sparse-index is not expanded' '
 	echo >>sparse-index/extra.txt &&
 	ensure_not_expanded add extra.txt &&
 	echo >>sparse-index/untracked.txt &&
-	ensure_not_expanded add .
+	ensure_not_expanded add . &&
+
+	ensure_not_expanded checkout -f update-deep &&
+	GIT_TEST_MERGE_ALGORITHM=ort \
+		ensure_not_expanded merge -m merge update-folder1 &&
+	GIT_TEST_MERGE_ALGORITHM=ort \
+		ensure_not_expanded merge -m merge update-folder2
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From 6d090743522fd7e994e95af0d7eddf3d94919a10 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 16:13:16 -0400
Subject: [PATCH 107/110] t1092: add cherry-pick, rebase tests

Add tests to check that cherry-pick and rebase behave the same in the
sparse-index case as in the full index cases.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 41a8e33458b9e8..0ae1f7af09008b 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -485,14 +485,17 @@ test_expect_success 'checkout and reset (mixed) [sparse]' '
 	test_sparse_match git reset update-folder2
 '
 
-test_expect_success 'merge' '
+test_expect_success 'merge, cherry-pick, and rebase' '
 	init_repos &&
 
-	test_all_match git checkout -b merge update-deep &&
-	test_all_match git merge -m "folder1" update-folder1 &&
-	test_all_match git rev-parse HEAD^{tree} &&
-	test_all_match git merge -m "folder2" update-folder2 &&
-	test_all_match git rev-parse HEAD^{tree}
+	for OPERATION in "merge -s ort -m merge" cherry-pick rebase
+	do
+		test_all_match git checkout -B temp update-deep &&
+		test_all_match git $OPERATION update-folder1 &&
+		test_all_match git rev-parse HEAD^{tree} &&
+		test_all_match git $OPERATION update-folder2 &&
+		test_all_match git rev-parse HEAD^{tree} || return 1
+	done
 '
 
 test_expect_success 'merge with conflict outside cone' '

From efc0bf7ce50f2fd6f457448973e7115783d08f79 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 16:28:52 -0400
Subject: [PATCH 108/110] sparse-index: integrate with cherry-pick and rebase

The hard work was already done with 'git merge' and the ORT strategy.
Just add extra tests to see that we get the expected results in the
non-conflict cases.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/rebase.c                         |  6 ++++
 builtin/revert.c                         |  3 ++
 t/t1092-sparse-checkout-compatibility.sh | 43 +++++++++++++++++++++---
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 12f093121d9ed3..25573444b9396a 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -559,6 +559,9 @@ int cmd_rebase__interactive(int argc, const char **argv, const char *prefix)
 	argc = parse_options(argc, argv, prefix, options,
 			builtin_rebase_interactive_usage, PARSE_OPT_KEEP_ARGV0);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	if (!is_null_oid(&squash_onto))
 		opts.squash_onto = &squash_onto;
 
@@ -1430,6 +1433,9 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
 		usage_with_options(builtin_rebase_usage,
 				   builtin_rebase_options);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	options.allow_empty_message = 1;
 	git_config(rebase_config, &options);
 	/* options.gpg_sign_opt will be either "-S" or NULL */
diff --git a/builtin/revert.c b/builtin/revert.c
index 237f2f18d4c035..6c4c22691bdaf0 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -136,6 +136,9 @@ static int run_sequencer(int argc, const char **argv, struct replay_opts *opts)
 			PARSE_OPT_KEEP_ARGV0 |
 			PARSE_OPT_KEEP_UNKNOWN);
 
+	prepare_repo_settings(the_repository);
+	the_repository->settings.command_requires_full_index = 0;
+
 	/* implies allow_empty */
 	if (opts->keep_redundant_commits)
 		opts->allow_empty = 1;
diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 0ae1f7af09008b..1320f120942b9e 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -526,6 +526,38 @@ test_expect_success 'merge with conflict outside cone' '
 	test_all_match git rev-parse HEAD^{tree}
 '
 
+test_expect_success 'cherry-pick/rebase with conflict outside cone' '
+	init_repos &&
+
+	for OPERATION in cherry-pick rebase
+	do
+		test_all_match git checkout -B tip &&
+		test_all_match git reset --hard merge-left &&
+		test_all_match git status --porcelain=v2 &&
+		test_all_match test_must_fail git $OPERATION merge-right &&
+		test_all_match git status --porcelain=v2 &&
+
+		# Resolve the conflict in different ways:
+		# 1. Revert to the base
+		test_all_match git checkout base -- deep/deeper2/a &&
+		test_all_match git status --porcelain=v2 &&
+
+		# 2. Add the file with conflict markers
+		test_all_match git add folder1/a &&
+		test_all_match git status --porcelain=v2 &&
+
+		# 3. Rename the file to another sparse filename and
+		#    accept conflict markers as resolved content.
+		run_on_all mv folder2/a folder2/z &&
+		test_all_match git add folder2 &&
+		test_all_match git status --porcelain=v2 &&
+
+		test_all_match git $OPERATION --continue &&
+		test_all_match git status --porcelain=v2 &&
+		test_all_match git rev-parse HEAD^{tree} || return 1
+	done
+'
+
 test_expect_success 'merge with outside renames' '
 	init_repos &&
 
@@ -676,11 +708,12 @@ test_expect_success 'sparse-index is not expanded' '
 	echo >>sparse-index/untracked.txt &&
 	ensure_not_expanded add . &&
 
-	ensure_not_expanded checkout -f update-deep &&
-	GIT_TEST_MERGE_ALGORITHM=ort \
-		ensure_not_expanded merge -m merge update-folder1 &&
-	GIT_TEST_MERGE_ALGORITHM=ort \
-		ensure_not_expanded merge -m merge update-folder2
+	for OPERATION in "merge -s ort -m merge" cherry-pick rebase
+	do
+		ensure_not_expanded checkout -f -B temp update-deep &&
+		ensure_not_expanded $OPERATION update-folder1 &&
+		ensure_not_expanded $OPERATION update-folder2 || return 1
+	done
 '
 
 # NEEDSWORK: a sparse-checkout behaves differently from a full checkout

From f36b3ff2f6c372adb84e02efc830ce4dd5162b0b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 26 Jul 2021 21:13:11 -0400
Subject: [PATCH 109/110] fixup! t1092: use ORT merge strategy

---
 t/t1092-sparse-checkout-compatibility.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index c3dca0d0203cf1..7937b543e3e9e4 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -9,7 +9,8 @@ GIT_TEST_SPARSE_INDEX=
 
 # Force the use of the ORT merge algorithm until testing with the
 # recursive strategy. We expect ORT to be used with sparse-index.
-export GIT_TEST_MERGE_ALGORITHM=ort
+GIT_TEST_MERGE_ALGORITHM=ort
+export GIT_TEST_MERGE_ALGORITHM
 
 test_expect_success 'setup' '
 	git init initial-repo &&

From 4f481965c0e7904e0e95a8f12b3437927ff46ee8 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Tue, 27 Jul 2021 21:32:59 -0400
Subject: [PATCH 110/110] merge-ort: expand only for out-of-cone conflicts

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 merge-ort.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/merge-ort.c b/merge-ort.c
index 1149d02941aa4a..b9f56854dd53a7 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -35,6 +35,7 @@
 #include "tree.h"
 #include "unpack-trees.h"
 #include "xdiff-interface.h"
+#include "dir.h"
 
 /*
  * We have many arrays of size 3.  Whenever we have such an array, the
@@ -3608,11 +3609,18 @@ static int record_conflicted_index_entries(struct merge_options *opt)
 
 	/*
 	 * We are in a conflicted state. These conflicts might be inside
-	 * sparse-directory entries, so expand the index preemtively.
-	 * Also, we set original_cache_nr below, but that might change if
+	 * sparse-directory entries, so check if any entries are outside
+	 * of the sparse-checkout cone preemptively.
+	 *
+	 * We set original_cache_nr below, but that might change if
 	 * index_name_pos() calls ask for paths within sparse directories.
 	 */
-	ensure_full_index(index);
+	strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
+		if (!path_in_sparse_checkout(e->key, index)) {
+			ensure_full_index(index);
+			break;
+		}
+	}
 
 	/* If any entries have skip_worktree set, we'll have to check 'em out */
 	state.force = 1;