-
Notifications
You must be signed in to change notification settings - Fork 760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filter and flatten markers #4639
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#![allow(clippy::enum_glob_use)] | ||
#![allow(clippy::enum_glob_use, clippy::single_match_else)] | ||
|
||
use std::collections::HashMap; | ||
use std::ops::Bound::{self, *}; | ||
|
@@ -136,16 +136,24 @@ pub(crate) fn requires_python_marker(tree: &MarkerTree) -> Option<RequiresPython | |
/// Normalizes this marker tree. | ||
/// | ||
/// This function does a number of operations to normalize a marker tree recursively: | ||
/// - Sort all nested expressions. | ||
/// - Sort and flatten all nested expressions. | ||
/// - Simplify expressions. This includes combining overlapping version ranges and removing duplicate | ||
/// expressions at the same level of precedence. For example, `(a == 'a' and a == 'a') or b == 'b'` can | ||
/// be reduced, but `a == 'a' and (a == 'a' or b == 'b')` cannot. | ||
/// expressions. | ||
/// - Normalize the order of version expressions to the form `<version key> <version op> <version>` | ||
/// (i.e. not the reverse). | ||
/// | ||
/// This is useful in cases where creating conjunctions or disjunctions might occur in a non-deterministic | ||
/// order. This routine will attempt to erase the distinction created by such a construction. | ||
pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> { | ||
pub(crate) fn normalize(mut tree: MarkerTree) -> Option<MarkerTree> { | ||
// Filter out redundant expressions that show up before and after normalization. | ||
filter_all(&mut tree); | ||
let mut tree = normalize_all(tree)?; | ||
filter_all(&mut tree); | ||
Some(tree) | ||
} | ||
|
||
/// Normalize the marker tree recursively. | ||
pub(crate) fn normalize_all(tree: MarkerTree) -> Option<MarkerTree> { | ||
match tree { | ||
MarkerTree::And(trees) => { | ||
let mut reduced = Vec::new(); | ||
|
@@ -155,20 +163,27 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> { | |
// Simplify nested expressions as much as possible first. | ||
// | ||
// If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), omit it. | ||
let Some(subtree) = normalize(subtree) else { | ||
let Some(subtree) = normalize_all(subtree) else { | ||
continue; | ||
}; | ||
|
||
// Extract expressions we may be able to simplify more. | ||
if let MarkerTree::Expression(ref expr) = subtree { | ||
if let Some((key, range)) = keyed_range(expr) { | ||
versions.entry(key.clone()).or_default().push(range); | ||
continue; | ||
match subtree { | ||
MarkerTree::Or(_) => reduced.push(subtree), | ||
// Flatten nested `And` expressions. | ||
MarkerTree::And(subtrees) => reduced.extend(subtrees), | ||
// Extract expressions we may be able to simplify more. | ||
MarkerTree::Expression(ref expr) => { | ||
if let Some((key, range)) = keyed_range(expr) { | ||
versions.entry(key.clone()).or_default().push(range); | ||
continue; | ||
} | ||
|
||
reduced.push(subtree); | ||
} | ||
} | ||
reduced.push(subtree); | ||
} | ||
|
||
// Combine version ranges. | ||
simplify_ranges(&mut reduced, versions, |ranges| { | ||
ranges | ||
.iter() | ||
|
@@ -193,19 +208,25 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> { | |
// Simplify nested expressions as much as possible first. | ||
// | ||
// If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), return `true`. | ||
let subtree = normalize(subtree)?; | ||
|
||
// Extract expressions we may be able to simplify more. | ||
if let MarkerTree::Expression(ref expr) = subtree { | ||
if let Some((key, range)) = keyed_range(expr) { | ||
versions.entry(key.clone()).or_default().push(range); | ||
continue; | ||
let subtree = normalize_all(subtree)?; | ||
|
||
match subtree { | ||
MarkerTree::And(_) => reduced.push(subtree), | ||
// Flatten nested `Or` expressions. | ||
MarkerTree::Or(subtrees) => reduced.extend(subtrees), | ||
// Extract expressions we may be able to simplify more. | ||
MarkerTree::Expression(ref expr) => { | ||
if let Some((key, range)) = keyed_range(expr) { | ||
versions.entry(key.clone()).or_default().push(range); | ||
continue; | ||
} | ||
|
||
reduced.push(subtree); | ||
} | ||
} | ||
|
||
reduced.push(subtree); | ||
} | ||
|
||
// Combine version ranges. | ||
simplify_ranges(&mut reduced, versions, |ranges| { | ||
ranges | ||
.iter() | ||
|
@@ -226,6 +247,177 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> { | |
} | ||
} | ||
|
||
/// Removes redundant expressions from the tree recursively. | ||
/// | ||
/// This function does not attempt to flatten or clean the tree and may leave it in a denormalized state. | ||
pub(crate) fn filter_all(tree: &mut MarkerTree) { | ||
match tree { | ||
MarkerTree::And(trees) => { | ||
for subtree in &mut *trees { | ||
filter_all(subtree); | ||
} | ||
|
||
for conjunct in collect_expressions(trees) { | ||
// Filter out redundant disjunctions (by the Absorption Law). | ||
trees.retain_mut(|tree| !filter_disjunctions(tree, &conjunct)); | ||
|
||
// Filter out redundant expressions in this conjunction. | ||
for tree in &mut *trees { | ||
filter_conjunct_exprs(tree, &conjunct); | ||
} | ||
} | ||
} | ||
|
||
MarkerTree::Or(trees) => { | ||
for subtree in &mut *trees { | ||
filter_all(subtree); | ||
} | ||
|
||
for disjunct in collect_expressions(trees) { | ||
// Filter out redundant conjunctions (by the Absorption Law). | ||
trees.retain_mut(|tree| !filter_conjunctions(tree, &disjunct)); | ||
|
||
// Filter out redundant expressions in this disjunction. | ||
for tree in &mut *trees { | ||
filter_disjunct_exprs(tree, &disjunct); | ||
} | ||
} | ||
} | ||
|
||
MarkerTree::Expression(_) => {} | ||
} | ||
} | ||
|
||
// Collect all expressions from a tree. | ||
fn collect_expressions(trees: &[MarkerTree]) -> Vec<MarkerExpression> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Scroll-by Nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might add a little more rustdoc for cases like these. As it is, I think the comment is effectively strictly redundant with the name and type signature. I think I'd write something like this: /// For a given slice of marker trees, this collects
/// all direct leaf expressions from each marker tree,
/// and skips any marker trees that are themselves
/// not leaves. I might also add some context as to why this routine is useful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some more documentation. |
||
trees | ||
.iter() | ||
.filter_map(|tree| match tree { | ||
MarkerTree::Expression(expr) => Some(expr.clone()), | ||
_ => None, | ||
}) | ||
.collect() | ||
} | ||
|
||
// Filters out matching expressions from any nested disjunctions. | ||
fn filter_disjunct_exprs(tree: &mut MarkerTree, disjunct: &MarkerExpression) { | ||
match tree { | ||
MarkerTree::Or(trees) => { | ||
trees.retain_mut(|tree| match tree { | ||
MarkerTree::Expression(expr) => expr != disjunct, | ||
_ => { | ||
filter_disjunct_exprs(tree, disjunct); | ||
true | ||
} | ||
}); | ||
} | ||
MarkerTree::And(trees) => { | ||
for tree in trees { | ||
filter_disjunct_exprs(tree, disjunct); | ||
} | ||
} | ||
|
||
MarkerTree::Expression(_) => {} | ||
} | ||
} | ||
|
||
// Filters out matching expressions from any nested conjunctions. | ||
fn filter_conjunct_exprs(tree: &mut MarkerTree, conjunct: &MarkerExpression) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason why this doesn't also filter them out for a direct leaf expression? I guess it would need to rewrite the marker expression as Another perspective that I find helpful for routines like this is phrasing like, "returns a marker tree in which the given expression is assumed to be true in all conjunctions." I think that captures better the essence of the contract here if I'm understanding correctly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that's covered by the |
||
match tree { | ||
MarkerTree::And(trees) => { | ||
trees.retain_mut(|tree| match tree { | ||
MarkerTree::Expression(expr) => expr != conjunct, | ||
_ => { | ||
filter_conjunct_exprs(tree, conjunct); | ||
true | ||
} | ||
}); | ||
} | ||
MarkerTree::Or(trees) => { | ||
for tree in trees { | ||
filter_conjunct_exprs(tree, conjunct); | ||
} | ||
} | ||
MarkerTree::Expression(_) => {} | ||
} | ||
} | ||
|
||
// Filters out disjunctions that contain the given expression. | ||
// | ||
// Returns `true` if the outer tree should be removed. | ||
fn filter_disjunctions(tree: &mut MarkerTree, conjunct: &MarkerExpression) -> bool { | ||
let disjunction = match tree { | ||
MarkerTree::Or(trees) => trees, | ||
// Recurse because the tree might not have been flattened. | ||
MarkerTree::And(trees) => { | ||
trees.retain_mut(|tree| !filter_disjunctions(tree, conjunct)); | ||
return trees.is_empty(); | ||
} | ||
MarkerTree::Expression(_) => return false, | ||
}; | ||
|
||
let mut filter = Vec::new(); | ||
for (i, tree) in disjunction.iter_mut().enumerate() { | ||
match tree { | ||
// Found a matching expression, filter out this entire tree. | ||
MarkerTree::Expression(expr) if expr == conjunct => { | ||
return true; | ||
} | ||
// Filter subtrees. | ||
MarkerTree::Or(_) => { | ||
if filter_disjunctions(tree, conjunct) { | ||
filter.push(i); | ||
} | ||
} | ||
_ => {} | ||
} | ||
} | ||
|
||
for i in filter.into_iter().rev() { | ||
disjunction.remove(i); | ||
} | ||
|
||
false | ||
} | ||
|
||
// Filters out conjunctions that contain the given expression. | ||
// | ||
// Returns `true` if the outer tree should be removed. | ||
fn filter_conjunctions(tree: &mut MarkerTree, disjunct: &MarkerExpression) -> bool { | ||
let conjunction = match tree { | ||
MarkerTree::And(trees) => trees, | ||
// Recurse because the tree might not have been flattened. | ||
MarkerTree::Or(trees) => { | ||
trees.retain_mut(|tree| !filter_conjunctions(tree, disjunct)); | ||
return trees.is_empty(); | ||
} | ||
MarkerTree::Expression(_) => return false, | ||
}; | ||
|
||
let mut filter = Vec::new(); | ||
for (i, tree) in conjunction.iter_mut().enumerate() { | ||
match tree { | ||
// Found a matching expression, filter out this entire tree. | ||
MarkerTree::Expression(expr) if expr == disjunct => { | ||
return true; | ||
} | ||
// Filter subtrees. | ||
MarkerTree::And(_) => { | ||
if filter_conjunctions(tree, disjunct) { | ||
filter.push(i); | ||
} | ||
} | ||
_ => {} | ||
} | ||
} | ||
|
||
for i in filter.into_iter().rev() { | ||
conjunction.remove(i); | ||
} | ||
|
||
false | ||
} | ||
|
||
// Simplify version expressions. | ||
fn simplify_ranges( | ||
reduced: &mut Vec<MarkerTree>, | ||
|
@@ -487,12 +679,55 @@ mod tests { | |
"python_version == '3.18' and python_version < '3.17'", | ||
); | ||
|
||
// cannot simplify nested complex expressions | ||
// flatten nested expressions | ||
assert_marker_equal( | ||
"((extra == 'a' and extra == 'b') and extra == 'c') and extra == 'b'", | ||
"extra == 'a' and extra == 'b' and extra == 'c'", | ||
); | ||
|
||
assert_marker_equal( | ||
"((extra == 'a' or extra == 'b') or extra == 'c') or extra == 'b'", | ||
"extra == 'a' or extra == 'b' or extra == 'c'", | ||
); | ||
|
||
// complex expressions | ||
assert_marker_equal( | ||
"extra == 'a' or (extra == 'a' and extra == 'b')", | ||
"extra == 'a'", | ||
); | ||
|
||
assert_marker_equal( | ||
"extra == 'a' and (extra == 'a' or extra == 'b')", | ||
"extra == 'a' and (extra == 'a' or extra == 'b')", | ||
"extra == 'a'", | ||
); | ||
|
||
assert_marker_equal( | ||
"(extra == 'a' and (extra == 'a' or extra == 'b')) or extra == 'd'", | ||
"extra == 'a' or extra == 'd'", | ||
); | ||
|
||
assert_marker_equal( | ||
"((extra == 'a' and extra == 'b') or extra == 'c') or extra == 'b'", | ||
"extra == 'b' or extra == 'c'", | ||
); | ||
|
||
assert_marker_equal( | ||
"((extra == 'a' or extra == 'b') and extra == 'c') and extra == 'b'", | ||
"extra == 'b' and extra == 'c'", | ||
); | ||
|
||
assert_marker_equal( | ||
"((extra == 'a' or extra == 'b') and extra == 'c') or extra == 'b'", | ||
"extra == 'b' or (extra == 'a' and extra == 'c')", | ||
); | ||
|
||
// post-normalization filtering | ||
assert_marker_equal( | ||
"(python_version < '3.1' or python_version < '3.2') and (python_version < '3.2' or python_version == '3.3')", | ||
"python_version < '3.2'", | ||
); | ||
|
||
// normalize out redundant ranges | ||
assert_normalizes_out("python_version < '3.12.0rc1' or python_version >= '3.12.0rc1'"); | ||
|
||
assert_normalizes_out( | ||
|
@@ -668,7 +903,12 @@ mod tests { | |
let tree1 = MarkerTree::parse_reporter(one.as_ref(), &mut TracingReporter).unwrap(); | ||
let tree1 = normalize(tree1).unwrap(); | ||
let tree2 = MarkerTree::parse_reporter(two.as_ref(), &mut TracingReporter).unwrap(); | ||
assert_eq!(tree1.to_string(), tree2.to_string()); | ||
assert_eq!( | ||
tree1.to_string(), | ||
tree2.to_string(), | ||
"failed to normalize {}", | ||
one.as_ref() | ||
); | ||
} | ||
|
||
fn assert_normalizes_to(before: impl AsRef<str>, after: impl AsRef<str>) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh Clippy. (I agree with your stylistic choice here.)