diff --git a/ghstack/submit.py b/ghstack/submit.py index 644ce48..d95c66a 100644 --- a/ghstack/submit.py +++ b/ghstack/submit.py @@ -196,6 +196,9 @@ class DiffMeta: # happened to this pull request what: str + # The name of the branch that should be targeted + base: str + @property def pr_url(self) -> str: return self.elab_diff.pull_request_resolved.url() @@ -898,7 +901,7 @@ def process_commit( self._sanity_check_ghnum(username, ghnum) # Create base/head commits if needed - push_branches = self._create_non_orig_branches( + push_branches, base_branch = self._create_non_orig_branches( base, base_diff_meta, diff, elab_diff, username, ghnum, submit ) @@ -940,6 +943,7 @@ def process_commit( commit_msg=commit_msg, push_branches=push_branches, what=what, + base=base_branch, ) def _raise_poisoned(self) -> None: @@ -1087,7 +1091,7 @@ def _create_non_orig_branches( username: str, ghnum: GhNumber, submit: bool, - ) -> GhBranches: + ) -> Tuple[GhBranches, str]: # How exactly do we submit a commit to GitHub? # # Here is the relevant state: @@ -1158,6 +1162,7 @@ def _create_non_orig_branches( # Create base commit if necessary updated_base = False if not self.direct: + base_branch = branch_base(username, ghnum) if ( push_branches.base.commit is None or push_branches.base.commit.tree != base.tree @@ -1181,6 +1186,96 @@ def _create_non_orig_branches( head_args.extend(("-p", new_base)) push_branches.base.update(GhCommit(new_base, base.tree)) else: + # So, there is some complication here. We're computing what base + # to use based on the local situation on the user diff stack, but + # the remote merge structure may disagree with our local + # situation. For example, suppose I have a commit stack A - B, + # and then I insert a new commit A - M - B between them; + # previously B would have been based on A, but now it is based + # on M. What should happen here? + # + # Here are the high level correctness conditions: + # - No force pushes (history must be preserved) + # - GitHub displays a diff which is equivalent to the original + # user diff + # + # It turns out the logic here is fine, and the only thing it + # chokes on is rebasing back in time on master branch (you can't + # go back in time on PR branches, so this is a moot point there.) + # The problem is suppose you have: + # + # A - B - C + # \ \ + # M2 M1 # M1 was cherry-picked onto A, becoming M2 + # + # In branch form, this becomes: + # + # A - B - C + # \ \ + # \ M1 - M2 + # \ / + # \-----/ + # + # However, the merge base for C and M2 will always be computed to + # be B, because B is an ancestor of both C and M2, and it always + # beets out A (which is an ancestor of B). This means that you + # will diff M2 against B, which will typically result in "remove + # changes from B" spuriously showing up on the PR. + # + # When heads are always monotonically moving forward in time, + # there is not any problem with progressively more complicated + # merge histories, because we always specify the "correct" base + # branch. For example, consider: + # + # A - B + # \ + # \- X - Y1 + # \ + # \- Y2 + # + # Where Y1 is cherry-picked off of X onto B directly. In branch + # form, this becomes: + # + # A - B + # \ + # \- X - Y1 - Y2 + # + # But we update the base branch to be B, so we correctly diff Y2 + # against B (where here, the tree for Y2 no longer incorporates + # the changes for X). + # + # What does NOT work in this situation is if you manually (outside + # of ghstack) retarget Y2 back at X; we will spuriously report + # that the diff X and Y2 removes the changes from X. If you use + # ghstack, however, we will do this: + # + # A - B + # \ + # \- X - Y1 - Y2 - Y3 + # + # Where here Y3 has restored the changes from X, so the diff from + # X to Y3 checks out. + # + # It turns out there are a subset of manipulations, for which it + # is always safe to change the target base commit from GitHub UI + # without pushing a new commit. Intuitively, the idea is that + # once you add a commit as a merge base, you can't take it back: + # we always consider that branch to have been "merged in". So + # you can effectively only ever insert new commits between + # pre-existing commits, but once a commit depends on another + # commit, that dependency must always exist. I'm still + # considering whether or not we should force push by default in + # this sort of situation. + # + # By the way, what happens if you reorder commits? You get this + # funny looking graph: + # + # A - B + # \ + # X - Y - Y2 + # \ \ + # \------- X2 + # We never have to create a base commit, we read it out from # the base if base_diff_meta is not None: @@ -1189,12 +1284,30 @@ def _create_non_orig_branches( # # We can always use next, because if head is OK, head will have # been advanced to next anyway + # + # TODO: I do not feel this can be None if base_diff_meta.head is not None: + # TODO: This assert is sus, next may be ahed of head assert base_diff_meta.next == base_diff_meta.head new_base = base_diff_meta.next + + if base_diff_meta.next == base_diff_meta.head: + # use head + base_branch = branch_head( + base_diff_meta.username, base_diff_meta.ghnum + ) + else: + # use next + base_branch = branch_next( + base_diff_meta.username, base_diff_meta.ghnum + ) else: + # TODO: test that there isn't a more recent ancestor + # such that this doesn't actually work new_base = base.commit_id + base_branch = GitCommitHash(self.base) + # Check if the base is already an ancestor, don't need to add it # if so if push_branches.next.commit is not None and self.sh.git( @@ -1235,7 +1348,7 @@ def _create_non_orig_branches( else: push_branches.head.update(GhCommit(new_head, diff.tree)) - return push_branches + return push_branches, base_branch def _create_pull_request( self, @@ -1314,6 +1427,11 @@ def push_updates( ) ) # TODO: don't update this if it doesn't need updating + base_kwargs = {} + if self.direct: + base_kwargs["base"] = s.base + else: + assert s.base == s.elab_diff.base_ref self.github.patch( "repos/{owner}/{repo}/pulls/{number}".format( owner=self.repo_owner, repo=self.repo_name, number=s.number @@ -1323,6 +1441,7 @@ def push_updates( s.body, ), title=s.title, + **base_kwargs, ) # It is VERY important that we do base updates BEFORE real @@ -1483,8 +1602,10 @@ def assert_eq(a: Any, b: Any) -> None: (base_commit,) = ghstack.git.split_header( self.sh.git("rev-list", "--header", "-1", f"{self.remote_name}/{base_ref}") ) - # TODO: tree equality may not hold for self.direct - assert_eq(base_commit.tree, user_parent_commit.tree) + # TODO: tree equality may not hold for self.direct, figure out a + # related invariant + if not self.direct: + assert_eq(base_commit.tree, user_parent_commit.tree) # 6. Orig commit was correctly pushed assert_eq( @@ -1507,10 +1628,12 @@ def assert_eq(a: Any, b: Any) -> None: head_commit.commit_id, head_commit.parents[0], ] - assert pre_branch_state.base_commit_id in [ - base_commit.commit_id, - *([base_commit.parents[0]] if base_commit.parents else []), - ] + # The base branch can change if we changed base in direct mode + if not self.direct: + assert pre_branch_state.base_commit_id in [ + base_commit.commit_id, + *([base_commit.parents[0]] if base_commit.parents else []), + ] else: # Direct commit parent typically have base, as it will be the # main branch diff --git a/test_ghstack.py b/test_ghstack.py index fa8b4d4..22b2755 100644 --- a/test_ghstack.py +++ b/test_ghstack.py @@ -1365,103 +1365,159 @@ def test_cherry_pick(self) -> None: """, ) - # ------------------------------------------------------------------------- # + @use_direct() + def test_direct_cherry_pick(self) -> None: + self.sh.git("checkout", "-b", "feature") - def test_reorder(self) -> None: - self.writeFileAndAdd("file1.txt", "A") - self.sh.git("commit", "-m", "Commit 1\n\nA commit with an A") - self.sh.test_tick() + self.commit("A") + self.commit("B") + A, B = self.gh("Initial 2") - self.writeFileAndAdd("file2.txt", "B") - self.sh.git("commit", "-m", "Commit 2\n\nA commit with an B") - self.sh.test_tick() + self.sh.git("checkout", "master") + self.commit("M") + self.sh.git("push", "origin", "master") - self.gh("Initial") - self.sh.test_tick() - self.substituteRev("origin/gh/ezyang/1/head", "rMRG1") - self.substituteRev("origin/gh/ezyang/2/head", "rMRG2") + self.cherry_pick(B) + self.gh("Cherry pick") self.assertExpectedInline( self.dump_github(), """\ -[O] #500 Commit 1 (gh/ezyang/1/head -> gh/ezyang/1/base) +[O] #500 Commit A (gh/ezyang/1/head -> master) Stack: * #501 * __->__ #500 - A commit with an A - * d35565d (gh/ezyang/1/head) - | Initial on "Commit 1" - * b4fea1f (gh/ezyang/1/base) - Update base for Initial on "Commit 1" -[O] #501 Commit 2 (gh/ezyang/2/head -> gh/ezyang/2/base) + * 2949b6b (gh/ezyang/1/next, gh/ezyang/1/head) + | Initial 2 on "Commit A" + * dc8bfe4 + Initial commit + +[O] #501 Commit B (gh/ezyang/2/head -> master) Stack: * __->__ #501 + + + + * fd891f3 (gh/ezyang/2/next, gh/ezyang/2/head) + |\\ Cherry pick on "Commit B" + | * 686e5ea (HEAD -> master) + | | Commit M + * | d8884f2 + | | Initial 2 on "Commit B" + * | 2949b6b (gh/ezyang/1/next, gh/ezyang/1/head) + |/ Initial 2 on "Commit A" + * dc8bfe4 + Initial commit + +""", + ) + + # ------------------------------------------------------------------------- # + + def test_reorder(self) -> None: + self.commit("A") + self.commit("B") + A, B = self.gh("Initial") + + self.checkout(GitCommitHash("HEAD~~")) + self.cherry_pick(B) + self.cherry_pick(A) + B2, A2 = self.gh("Reorder") + + self.assertExpectedInline( + self.dump_github(), + """\ +[O] #500 Commit A (gh/ezyang/1/head -> gh/ezyang/1/base) + + Stack: + * __->__ #500 + * #501 + + + + * 5a11d6e (gh/ezyang/1/head) + |\\ Reorder on "Commit A" + | * 48df0b3 (gh/ezyang/1/base) + | | Update base for Reorder on "Commit A" + * | 30f6c01 + |/ Initial on "Commit A" + * 7e61353 + Update base for Initial on "Commit A" + +[O] #501 Commit B (gh/ezyang/2/head -> gh/ezyang/2/base) + + Stack: * #500 + * __->__ #501 - A commit with an B - * eae49cd (gh/ezyang/2/head) - | Initial on "Commit 2" - * 30abec4 (gh/ezyang/2/base) - Update base for Initial on "Commit 2" + + * 28e7ae2 (gh/ezyang/2/head) + |\\ Reorder on "Commit B" + | * 7be762b (gh/ezyang/2/base) + | | Update base for Reorder on "Commit B" + * | 4d6d2a4 + |/ Initial on "Commit B" + * c9e5b0d + Update base for Initial on "Commit B" """, ) - # https://stackoverflow.com/a/16205257 - self.sh.git("rebase", "--onto", "HEAD~2", "HEAD~", "HEAD") - self.sh.test_tick() - self.sh.git("cherry-pick", "master~") - self.sh.test_tick() + @use_direct() + def test_direct_reorder(self) -> None: + self.commit("A") + self.commit("B") + A, B = self.gh("Initial") - self.gh("Reorder") - self.sh.test_tick() - self.substituteRev("origin/gh/ezyang/1/base", "rINI1A") - self.substituteRev("origin/gh/ezyang/1/head", "rMRG1A") - self.substituteRev("origin/gh/ezyang/2/base", "rINI2A") - self.substituteRev("origin/gh/ezyang/2/head", "rMRG2A") + self.checkout(GitCommitHash("HEAD~~")) + self.cherry_pick(B) + self.cherry_pick(A) + B2, A2 = self.gh("Reorder") self.assertExpectedInline( self.dump_github(), """\ -[O] #500 Commit 1 (gh/ezyang/1/head -> gh/ezyang/1/base) +[O] #500 Commit A (gh/ezyang/1/head -> gh/ezyang/2/head) Stack: * __->__ #500 * #501 - A commit with an A - * 58a1844 (gh/ezyang/1/head) - |\\ Reorder on "Commit 1" - | * f8862a8 (gh/ezyang/1/base) - | | Update base for Reorder on "Commit 1" - * | d35565d - |/ Initial on "Commit 1" - * b4fea1f - Update base for Initial on "Commit 1" -[O] #501 Commit 2 (gh/ezyang/2/head -> gh/ezyang/2/base) + * 3a17667 (gh/ezyang/1/next, gh/ezyang/1/head) + |\\ Reorder on "Commit A" + | * 5f812b3 (gh/ezyang/2/next, gh/ezyang/2/head) + | | Reorder on "Commit B" + | * 60b80d9 + |/ Initial on "Commit B" + * 8bf3ca1 + | Initial on "Commit A" + * dc8bfe4 (HEAD -> master) + Initial commit + +[O] #501 Commit B (gh/ezyang/2/head -> master) Stack: * #500 * __->__ #501 - A commit with an B - * bd9356f (gh/ezyang/2/head) - |\\ Reorder on "Commit 2" - | * 39a5bae (gh/ezyang/2/base) - | | Update base for Reorder on "Commit 2" - * | eae49cd - |/ Initial on "Commit 2" - * 30abec4 - Update base for Initial on "Commit 2" + + * 5f812b3 (gh/ezyang/2/next, gh/ezyang/2/head) + | Reorder on "Commit B" + * 60b80d9 + | Initial on "Commit B" + * 8bf3ca1 + | Initial on "Commit A" + * dc8bfe4 (HEAD -> master) + Initial commit """, )