diff --git a/crate-status.md b/crate-status.md index 44fb8d7c4d1..64cae232207 100644 --- a/crate-status.md +++ b/crate-status.md @@ -371,6 +371,7 @@ Check out the [performance discussion][gix-diff-performance] as well. - [ ] shallow-history support - [ ] rename tracking (track different paths through history) - [ ] commits to ignore +- [ ] pass all blame-cornercases (from Git) * **Performance-Improvements** - [ ] use commit-graph bloom filter for performance - [ ] traverse input-commits in correct order without `compute_indegrees_to_depth()` diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 5b3ccd7cc05..0a18ee60e96 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -91,51 +91,48 @@ where let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); - 'outer: for item in traverse { - let item = item.map_err(|err| Error::Traverse(err.into()))?; - let suspect = item.id; + 'outer: while let Some(item) = traverse.next() { + let commit = item.map_err(|err| Error::Traverse(err.into()))?; + let suspect = commit.id; stats.commits_traversed += 1; - let mut parent_ids = item.parent_ids; + let parent_ids = commit.parent_ids; if parent_ids.is_empty() { - // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of - // the last `item` that was yielded by `traverse`, so it makes sense to assign the - // remaining lines to it, even though we don’t explicitly check whether that is true - // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty - // tree to validate this assumption. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; + if traverse.peek().is_none() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use diff-tree-to-tree to compare `suspect` + // against an empty tree to validate this assumption. + unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); + break; + } else { + // There is more, keep looking. + continue; + } } let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else { continue; }; - if parent_ids.len() == 1 { - let parent_id = parent_ids.pop().expect("just validated there is exactly one"); + for parent_id in &parent_ids { if let Some(parent_entry) = - find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats)? + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are identical - // which is why we can pass blame to the parent without further checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; + pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame); + continue 'outer; } } + } + let more_than_one_parent = parent_ids.len() > 1; + for parent_id in parent_ids { let changes_for_file_path = tree_diff_at_file_path( &odb, file_path, - item.id, + commit.id, parent_id, &mut stats, &mut diff_state, @@ -144,92 +141,42 @@ where &mut buf3, )?; let Some(modification) = changes_for_file_path else { - // None of the changes affected the file we’re currently blaming. Pass blame to parent. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { - // Every line that has not been blamed yet on a commit, is expected to have been - // added when the file was added to the repository. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; - } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } - } - } else { - for parent_id in &parent_ids { - if let Some(parent_entry) = - find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? - { - if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are - // identical which is why we can pass blame to the parent without further - // checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, *parent_id); - } - continue 'outer; - } - } - } - - for parent_id in parent_ids { - let changes_for_file_path = tree_diff_at_file_path( - &odb, - file_path, - item.id, - parent_id, - &mut stats, - &mut diff_state, - &mut buf, - &mut buf2, - &mut buf3, - )?; - let Some(modification) = changes_for_file_path else { + if more_than_one_parent { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.clone_blame(suspect, parent_id); } + } else { + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); + } + continue; + }; - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + if more_than_one_parent { // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which // it was modified, not added. // // TODO: I still have to figure out whether this is correct in all cases. + } else { + unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); + break; } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } + } + gix_diff::tree::recorder::Change::Deletion { .. } => { + unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted") + } + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); } } + } + if more_than_one_parent { for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.remove_blame(suspect); } @@ -252,6 +199,24 @@ where }) } +/// The blobs storing the blamed file in `entry` and `parent_entry` are identical which is why +/// we can pass blame to the parent without further checks. +fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec) { + for unblamed_hunk in hunks_to_blame { + unblamed_hunk.pass_blame(from, to); + } +} + +fn unblamed_to_out(hunks_to_blame: &mut Vec, out: &mut Vec, suspect: ObjectId) { + // Every line that has not been blamed yet on a commit, is expected to have been + // added when the file was added to the repository. + out.extend( + hunks_to_blame + .drain(..) + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); +} + /// This function merges adjacent blame entries. It merges entries that are adjacent both in the /// blamed file and in the original file that introduced them. This follows `git`’s /// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 7132af04048..eed47d66439 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -90,13 +90,12 @@ fn process_change( }; let range_in_suspect = range_in_suspect.clone(); - - match ( - range_in_suspect.contains(&added.start), - // Since `added` is a range that is not inclusive at the end, `added.end` is - // not part of `added`. The first line that is `added.end - 1`. - (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, - ) { + let range_contains_added_start = range_in_suspect.contains(&added.start); + // Since `added` is a range that is not inclusive at the end, `added.end` is + // not part of `added`. The first line that is `added.end - 1`. + let range_contains_added_end = + (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end; + match (range_contains_added_start, range_contains_added_end) { (true, true) => { // <----------> (hunk) // <---> (added) @@ -442,15 +441,15 @@ impl BlameEntry { } /// Create an offset from a portion of the *Original File*. - fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { + fn from_unblamed_hunk(mut unblamed_hunk: UnblamedHunk, commit_id: ObjectId) -> Self { let range_in_original_file = unblamed_hunk .suspects - .get(&commit_id) + .remove(&commit_id) .expect("Private and only called when we now `commit_id` is in the suspect list"); Self { - range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), - range_in_original_file: range_in_original_file.clone(), + range_in_blamed_file: unblamed_hunk.range_in_blamed_file, + range_in_original_file, commit_id, } }