From 894bedba1ad68206710453fec75177b4bfa30d14 Mon Sep 17 00:00:00 2001 From: Christian Schilling Date: Mon, 31 Oct 2022 12:51:20 +0000 Subject: [PATCH] Implement squash filter for list of commits (#948) --- josh-proxy/src/lib.rs | 1 + src/bin/josh-filter.rs | 124 +++++++++++++++++++++++++++-------------- src/cache.rs | 6 +- src/filter/mod.rs | 70 +++++++++++++++++++++-- src/filter/parse.rs | 3 +- src/history.rs | 47 +++++++++++++--- tests/filter/file.t | 2 +- tests/filter/squash.t | 96 +++++++++++++++++++++++++++++++ 8 files changed, 290 insertions(+), 59 deletions(-) create mode 100644 tests/filter/squash.t diff --git a/josh-proxy/src/lib.rs b/josh-proxy/src/lib.rs index afbca30a1..9d00fcbff 100644 --- a/josh-proxy/src/lib.rs +++ b/josh-proxy/src/lib.rs @@ -393,6 +393,7 @@ fn split_changes( &repo.find_commit(changes[i].1)?, &vec![&parent], &new_tree, + None, )?; changes[i].1 = new_commit; new_bases.push(new_commit); diff --git a/src/bin/josh-filter.rs b/src/bin/josh-filter.rs index 35963b0dc..c94be3fdb 100644 --- a/src/bin/josh-filter.rs +++ b/src/bin/josh-filter.rs @@ -37,9 +37,24 @@ fn make_app() -> clap::Command { ) .arg( clap::Arg::new("squash") + .help("Produce a history that contains only commits pointed to by references matching the given pattern") + .long("squash") + ) + .arg( + clap::Arg::new("author") + .help("Author to use for commits with rewritten message") + .long("author") + ) + .arg( + clap::Arg::new("email") + .help("Author email to use for commits with rewritten message") + .long("email") + ) + .arg( + clap::Arg::new("single") .action(clap::ArgAction::SetTrue) - .help("Only output one commit, without history") - .long("squash"), + .help("Produce a history that contains only one single commit") + .long("single"), ) .arg( clap::Arg::new("discover") @@ -138,10 +153,6 @@ fn run_filter(args: Vec) -> josh::JoshResult { let mut filterobj = josh::filter::parse(&specstr)?; - if args.get_flag("squash") { - filterobj = josh::filter::chain(josh::filter::parse(":SQUASH")?, filterobj); - } - if args.get_flag("print-filter") { let filterobj = if args.get_flag("reverse") { josh::filter::invert(filterobj)? @@ -162,6 +173,38 @@ fn run_filter(args: Vec) -> josh::JoshResult { let transaction = josh::cache::Transaction::new(repo, None); let repo = transaction.repo(); + let input_ref = args.get_one::("input").unwrap(); + + let mut refs = vec![]; + let mut ids = vec![]; + + let reference = repo.resolve_reference_from_short_name(input_ref).unwrap(); + let input_ref = reference.name().unwrap().to_string(); + refs.push((input_ref.clone(), reference.target().unwrap())); + + if args.get_flag("single") { + filterobj = josh::filter::chain(josh::filter::squash(None), filterobj); + } + + if let Some(pattern) = args.get_one::("squash") { + let pattern = pattern.to_string(); + for reference in repo.references_glob(&pattern).unwrap() { + let reference = reference?; + if let Some(target) = reference.target() { + ids.push((target, reference.name().unwrap().to_string())); + refs.push((reference.name().unwrap().to_string(), target)); + } + } + filterobj = josh::filter::chain( + josh::filter::squash(Some(( + args.get_one::("author").unwrap(), + args.get_one::("email").unwrap(), + &ids, + ))), + filterobj, + ); + }; + let odb = repo.odb()?; let mp = if args.get_flag("pack") { let mempack = odb.add_new_mempack_backend(1000)?; @@ -188,10 +231,8 @@ fn run_filter(args: Vec) -> josh::JoshResult { } }); - let input_ref = args.get_one::("input").unwrap(); - if args.get_flag("discover") { - let r = repo.revparse_single(input_ref)?; + let r = repo.revparse_single(&input_ref)?; let hs = josh::housekeeping::find_all_workspaces_and_subdirectories(&r.peel_to_tree()?)?; for i in hs { if i.contains(":workspace=") { @@ -210,23 +251,10 @@ fn run_filter(args: Vec) -> josh::JoshResult { let update_target = args.get_one::("update").unwrap(); - let src = input_ref; let target = update_target; let reverse = args.get_flag("reverse"); - let t = if reverse { - "refs/JOSH_TMP".to_owned() - } else { - target.to_string() - }; - let src_r = repo - .revparse_ext(src)? - .1 - .ok_or(josh::josh_error("reference not found"))?; - - let src = src_r.name().unwrap().to_string(); - let check_permissions = args.get_flag("check-permission"); let mut permissions_filter = josh::filter::empty(); if check_permissions { @@ -264,28 +292,31 @@ fn run_filter(args: Vec) -> josh::JoshResult { permissions_filter = josh::filter::empty(); } - let old_oid = if let Ok(id) = transaction.repo().refname_to_id(&t) { + let old_oid = if let Ok(id) = transaction.repo().refname_to_id(&target) { id } else { git2::Oid::zero() }; - let mut updated_refs = josh::filter_refs( - &transaction, - filterobj, - &[(src.clone(), src_r.target().unwrap())], - permissions_filter, - )?; - updated_refs[0].0 = t; - josh::update_refs(&transaction, &mut updated_refs, ""); - if args.get_one::("update").map(|v| v.as_str()) != Some("FILTERED_HEAD") - && updated_refs.len() == 1 - && updated_refs[0].1 == old_oid - { - println!( - "Warning: reference {} wasn't updated", - args.get_one::("update").unwrap() - ); + + let mut updated_refs = josh::filter_refs(&transaction, filterobj, &refs, permissions_filter)?; + for i in 0..updated_refs.len() { + if updated_refs[i].0 == input_ref { + if reverse { + updated_refs[i].0 = "refs/JOSH_TMP".to_string(); + } else { + updated_refs[i].0 = target.to_string(); + } + } else { + updated_refs[i].0 = + updated_refs[i] + .0 + .replacen("refs/heads/", "refs/heads/filtered/", 1); + updated_refs[i].0 = updated_refs[i] + .0 + .replacen("refs/tags/", "refs/tags/filtered/", 1); + } } + josh::update_refs(&transaction, &mut updated_refs, ""); #[cfg(feature = "search")] if let Some(searchstring) = args.get_one::("search") { @@ -324,7 +355,7 @@ fn run_filter(args: Vec) -> josh::JoshResult { if reverse { let new = repo.revparse_single(target).unwrap().id(); let old = repo.revparse_single("JOSH_TMP").unwrap().id(); - let unfiltered_old = repo.revparse_single(input_ref).unwrap().id(); + let unfiltered_old = repo.revparse_single(&input_ref).unwrap().id(); match josh::history::unapply_filter( &transaction, @@ -337,7 +368,7 @@ fn run_filter(args: Vec) -> josh::JoshResult { &mut None, ) { Ok(rewritten) => { - repo.reference(&src, rewritten, true, "unapply_filter")?; + repo.reference(&input_ref, rewritten, true, "unapply_filter")?; } Err(JoshError(msg)) => { println!("{}", msg); @@ -346,6 +377,17 @@ fn run_filter(args: Vec) -> josh::JoshResult { } } + if !reverse + && args.get_one::("update") != Some(&"FILTERED_HEAD".to_string()) + && updated_refs.len() == 1 + && updated_refs[0].1 == old_oid + { + println!( + "Warning: reference {} wasn't updated", + args.get_one::("update").unwrap() + ); + } + if let Some(gql_query) = args.get_one::("graphql") { let context = josh::graphql::context(transaction.try_clone()?, transaction.try_clone()?); *context.allow_refs.lock()? = true; diff --git a/src/cache.rs b/src/cache.rs index 7f3c7822c..9eccc860f 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -33,10 +33,10 @@ pub fn print_stats() { let name = String::from_utf8(name.to_vec()).unwrap(); let t = db.open_tree(&name).unwrap(); if !t.is_empty() { - let name = if name.contains("SUBTRACT") || name.starts_with('_') { - name.clone() + let name = if let Ok(filter) = filter::parse(&name) { + filter::pretty(filter, 4) } else { - filter::pretty(filter::parse(&name).unwrap(), 4) + name.clone() }; v.push((t.len(), name)); } diff --git a/src/filter/mod.rs b/src/filter/mod.rs index 022b48626..54e41ca3d 100644 --- a/src/filter/mod.rs +++ b/src/filter/mod.rs @@ -61,6 +61,18 @@ pub fn empty() -> Filter { to_filter(Op::Empty) } +pub fn squash(ids: Option<(&str, &str, &[(git2::Oid, String)])>) -> Filter { + if let Some((author, email, ids)) = ids { + to_filter(Op::Squash(Some( + ids.iter() + .map(|(x, y)| (*x, (y.clone(), author.to_string(), email.to_string()))) + .collect(), + ))) + } else { + to_filter(Op::Squash(None)) + } +} + fn to_filter(op: Op) -> Filter { let s = format!("{:?}", op); let f = Filter( @@ -85,7 +97,7 @@ enum Op { Empty, Fold, Paths, - Squash, + Squash(Option>), Linear, RegexReplace(regex::Regex, String), @@ -236,7 +248,18 @@ fn spec2(op: &Op) -> String { #[cfg(feature = "search")] Op::Index => ":INDEX".to_string(), Op::Fold => ":FOLD".to_string(), - Op::Squash => ":SQUASH".to_string(), + Op::Squash(None) => ":SQUASH".to_string(), + Op::Squash(Some(hs)) => { + let mut v = hs + .iter() + .map(|(x, y)| format!("{}:{}:{}:{}", x, y.0, y.1, y.2)) + .collect::>(); + v.sort(); + let s = v.join(","); + let s = git2::Oid::hash_object(git2::ObjectType::Blob, s.as_bytes()) + .expect("hash_object filter"); + format!(":SQUASH={}", s) + } Op::Linear => ":linear".to_string(), Op::Subdir(path) => format!(":/{}", parse::quote(&path.to_string_lossy())), Op::File(path) => format!("::{}", parse::quote(&path.to_string_lossy())), @@ -341,8 +364,15 @@ fn apply_to_commit2( Ok(Some(git2::Oid::zero())) }; } - Op::Squash => { - return Some(history::rewrite_commit(repo, commit, &[], &commit.tree()?)).transpose() + Op::Squash(None) => { + return Some(history::rewrite_commit( + repo, + commit, + &[], + &commit.tree()?, + None, + )) + .transpose() } _ => { if let Some(oid) = transaction.get(filter, commit.id()) { @@ -354,6 +384,27 @@ fn apply_to_commit2( rs_tracing::trace_scoped!("apply_to_commit", "spec": spec(filter), "commit": commit.id().to_string()); let filtered_tree = match &to_op(filter) { + Op::Squash(Some(ids)) => { + if let Some(_) = ids.get(&commit.id()) { + commit.tree()? + } else { + for parent in commit.parents() { + return Ok( + if let Some(fparent) = transaction.get(filter, parent.id()) { + Some(history::drop_commit( + commit, + vec![fparent], + transaction, + filter, + )?) + } else { + None + }, + ); + } + tree::empty(repo) + } + } Op::Linear => { let p: Vec<_> = commit.parent_ids().collect(); if p.is_empty() { @@ -370,6 +421,7 @@ fn apply_to_commit2( commit.tree()?, transaction, filter, + None, )) .transpose(); } @@ -452,6 +504,7 @@ fn apply_to_commit2( filtered_tree, transaction, filter, + None, )) .transpose(); } @@ -528,12 +581,18 @@ fn apply_to_commit2( let filtered_parent_ids = some_or!(filtered_parent_ids, { return Ok(None) }); + let message = match to_op(filter) { + Op::Squash(Some(ids)) => ids.get(&commit.id()).map(|x| x.clone()), + _ => None, + }; + Some(history::create_filtered_commit( commit, filtered_parent_ids, filtered_tree, transaction, filter, + message, )) .transpose() } @@ -557,7 +616,8 @@ fn apply2<'a>( Op::Nop => Ok(tree), Op::Empty => return Ok(tree::empty(repo)), Op::Fold => Ok(tree), - Op::Squash => Ok(tree), + Op::Squash(None) => Ok(tree), + Op::Squash(Some(_)) => Err(josh_error("not applicable to tree")), Op::Linear => Ok(tree), Op::RegexReplace(regex, replacement) => { diff --git a/src/filter/parse.rs b/src/filter/parse.rs index c7506a99c..e087e1918 100644 --- a/src/filter/parse.rs +++ b/src/filter/parse.rs @@ -33,7 +33,8 @@ fn make_op(args: &[&str]) -> JoshResult { Where `path` is path to the directory where workspace.josh file is located "# ))), - ["SQUASH"] => Ok(Op::Squash), + ["SQUASH"] => Ok(Op::Squash(None)), + ["SQUASH", _ids @ ..] => Err(josh_error("SQUASH with ids can't be parsed")), ["linear"] => Ok(Op::Linear), ["PATHS"] => Ok(Op::Paths), #[cfg(feature = "search")] diff --git a/src/history.rs b/src/history.rs index 94249b92d..299d05bb3 100644 --- a/src/history.rs +++ b/src/history.rs @@ -181,20 +181,29 @@ pub fn rewrite_commit( base: &git2::Commit, parents: &[&git2::Commit], tree: &git2::Tree, + message: Option<(String, String, String)>, ) -> JoshResult { - if base.tree()?.id() == tree.id() && all_equal(base.parents(), parents) { + if message == None && base.tree()?.id() == tree.id() && all_equal(base.parents(), parents) { // Looks like an optimization, but in fact serves to not change the commit in case // it was signed. return Ok(base.id()); } - let b = repo.commit_create_buffer( - &base.author(), - &base.committer(), - base.message_raw().unwrap_or("no message"), - tree, - parents, - )?; + let b = if let Some((message, author, email)) = message { + let a = base.author(); + let new_a = git2::Signature::new(&author, &email, &a.when())?; + let c = base.committer(); + let new_c = git2::Signature::new(&author, &email, &c.when())?; + repo.commit_create_buffer(&new_a, &new_c, &message, tree, parents)? + } else { + repo.commit_create_buffer( + &base.author(), + &base.committer(), + &base.message_raw().unwrap_or("no message"), + tree, + parents, + )? + }; if let Ok((sig, _)) = repo.extract_signature(&base.id(), None) { // Re-create the object with the original signature (which of course does not match any @@ -493,6 +502,7 @@ pub fn unapply_filter( &module_commit, &original_parents_refs, &new_tree, + None, )?; if let Some(ref mut change_ids) = change_ids { @@ -526,18 +536,37 @@ fn select_parent_commits<'a>( } } +pub fn drop_commit<'a>( + original_commit: &'a git2::Commit, + filtered_parent_ids: Vec, + transaction: &cache::Transaction, + filter: filter::Filter, +) -> JoshResult { + let r = if let Some(id) = filtered_parent_ids.iter().next() { + *id + } else { + git2::Oid::zero() + }; + + transaction.insert(filter, original_commit.id(), r, false); + + Ok(r) +} + pub fn create_filtered_commit<'a>( original_commit: &'a git2::Commit, filtered_parent_ids: Vec, filtered_tree: git2::Tree<'a>, transaction: &cache::Transaction, filter: filter::Filter, + message: Option<(String, String, String)>, ) -> JoshResult { let (r, is_new) = create_filtered_commit2( transaction.repo(), original_commit, filtered_parent_ids, filtered_tree, + message, )?; let store = is_new || original_commit.parent_ids().len() != 1; @@ -552,6 +581,7 @@ fn create_filtered_commit2<'a>( original_commit: &'a git2::Commit, filtered_parent_ids: Vec, filtered_tree: git2::Tree<'a>, + message: Option<(String, String, String)>, ) -> JoshResult<(git2::Oid, bool)> { let filtered_parent_commits: Result, _> = filtered_parent_ids .iter() @@ -596,6 +626,7 @@ fn create_filtered_commit2<'a>( original_commit, &selected_filtered_parent_commits, &filtered_tree, + message, )?, true, )) diff --git a/tests/filter/file.t b/tests/filter/file.t index 2bc448ca9..d45f9381b 100644 --- a/tests/filter/file.t +++ b/tests/filter/file.t @@ -43,7 +43,7 @@ * add file2 * add file1 - $ josh-filter -s --squash --file file.josh + $ josh-filter -s --single --file file.josh [2] :prefix=a [2] :prefix=b [3] :/sub1 diff --git a/tests/filter/squash.t b/tests/filter/squash.t new file mode 100644 index 000000000..3a1416c0f --- /dev/null +++ b/tests/filter/squash.t @@ -0,0 +1,96 @@ + $ export RUST_BACKTRACE=1 + $ git init -q 1> /dev/null + + $ echo contents1 > file1 + $ git add . + $ git commit -m "add file1" 1> /dev/null + + $ git log --graph --pretty=%s + * add file1 + + $ git checkout -b branch2 + Switched to a new branch 'branch2' + + $ echo contents2 > file1 + $ git add . + $ git commit -m "mod file1" 1> /dev/null + + $ echo contents3 > file3 + $ git add . + $ git commit -m "mod file3" 1> /dev/null + + $ git checkout master + Switched to branch 'master' + + $ echo contents3 > file2 + $ git add . + $ git commit -m "add file2" 1> /dev/null + + $ git merge -q branch2 --no-ff + + $ josh-filter -s --squash "refs/tags/*" --author "New Author" --email "new@e.mail" --update refs/heads/filtered + Warning: reference refs/heads/filtered wasn't updated + [1] :SQUASH=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + + $ git log --graph --decorate --pretty=oneline refs/heads/filtered + fatal: ambiguous argument 'refs/heads/filtered': unknown revision or path not in the working tree. + Use '--' to separate paths from revisions, like this: + 'git [...] -- [...]' + [128] + $ git tag tag_a 1d69b7d + $ josh-filter -s --squash "refs/tags/*" --author "New Author" --email "new@e.mail" --update refs/heads/filtered + [1] :SQUASH=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + [2] :SQUASH=e8e83b9c5d2f779f0cea83a6cad68b710a399c96 + + $ git log --graph --decorate --pretty=oneline refs/heads/filtered + * d8aa5a9937f4f0bd645dbc0b591bae5cd6b6d91b (tag: filtered/tag_a, filtered) refs/tags/tag_a + $ git tag tag_b 0b4cf6c + + + $ git log --graph --decorate --pretty=oneline + * 1d69b7d2651f744be3416f2ad526aeccefb99310 (HEAD -> master, tag: tag_a) Merge branch 'branch2' + |\ + | * 86871b8775ad3baca86484337d1072aa1d386f7e (branch2) mod file3 + | * 975d4c4975912729482cc864d321c5196a969271 mod file1 + * | e707f76bb6a1390f28b2162da5b5eb6933009070 add file2 + |/ + * 0b4cf6c9efbbda1eada39fa9c1d21d2525b027bb (tag: tag_b) add file1 + + $ josh-filter -s --squash "refs/tags/*" --author "New Author" --email "new@e.mail" --update refs/heads/filtered + [1] :SQUASH=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + [2] :SQUASH=e8e83b9c5d2f779f0cea83a6cad68b710a399c96 + [3] :SQUASH=3953063f3dc58661e9db16f9014aab1e8ec50bf8 + + $ git log --graph --decorate --pretty=oneline refs/heads/filtered + * 5b1a753860ca124024f6dfb4fd018fe7df8beae4 (tag: filtered/tag_a, filtered) refs/tags/tag_a + |\ + * 96a731a4d64a8928e6af7abb2d425df3812b4197 (tag: filtered/tag_b) refs/tags/tag_b + + $ git log --graph --pretty=%an:%ae refs/heads/master + * Josh:josh@example.com + |\ + | * Josh:josh@example.com + | * Josh:josh@example.com + * | Josh:josh@example.com + |/ + * Josh:josh@example.com + $ git log --graph --pretty=%an:%ae refs/heads/filtered + * New Author:new@e.mail + |\ + * New Author:new@e.mail + + $ git tag tag_c 975d4c4 + + $ josh-filter -s --squash "refs/tags/*" --author "New Author" --email "new@e.mail" --update refs/heads/filtered + [1] :SQUASH=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + [2] :SQUASH=e8e83b9c5d2f779f0cea83a6cad68b710a399c96 + [3] :SQUASH=3953063f3dc58661e9db16f9014aab1e8ec50bf8 + [6] :SQUASH=6a132477d438779dbaeb0d68b9aab55786e28dd9 + + $ git log --graph --decorate --pretty=oneline refs/heads/filtered + * 9fe45cb2bead844630852ab338ecd8e073f8ba50 (tag: filtered/tag_a, filtered) refs/tags/tag_a + |\ + | * d6b88d4c1cc566b7f4d9b51353ec6f3204a93b81 (tag: filtered/tag_c) refs/tags/tag_c + |/ + * 96a731a4d64a8928e6af7abb2d425df3812b4197 (tag: filtered/tag_b) refs/tags/tag_b +