Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter and flatten markers #4639

Merged
merged 5 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 264 additions & 24 deletions crates/uv-resolver/src/marker.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![allow(clippy::enum_glob_use)]
#![allow(clippy::enum_glob_use, clippy::single_match_else)]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh Clippy. (I agree with your stylistic choice here.)

use std::collections::HashMap;
use std::ops::Bound::{self, *};
Expand Down Expand Up @@ -136,16 +136,24 @@ pub(crate) fn requires_python_marker(tree: &MarkerTree) -> Option<RequiresPython
/// Normalizes this marker tree.
///
/// This function does a number of operations to normalize a marker tree recursively:
/// - Sort all nested expressions.
/// - Sort and flatten all nested expressions.
/// - Simplify expressions. This includes combining overlapping version ranges and removing duplicate
/// expressions at the same level of precedence. For example, `(a == 'a' and a == 'a') or b == 'b'` can
/// be reduced, but `a == 'a' and (a == 'a' or b == 'b')` cannot.
/// expressions.
/// - Normalize the order of version expressions to the form `<version key> <version op> <version>`
/// (i.e. not the reverse).
///
/// This is useful in cases where creating conjunctions or disjunctions might occur in a non-deterministic
/// order. This routine will attempt to erase the distinction created by such a construction.
pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> {
pub(crate) fn normalize(mut tree: MarkerTree) -> Option<MarkerTree> {
// Filter out redundant expressions that show up before and after normalization.
filter_all(&mut tree);
let mut tree = normalize_all(tree)?;
filter_all(&mut tree);
Some(tree)
}

/// Normalize the marker tree recursively.
pub(crate) fn normalize_all(tree: MarkerTree) -> Option<MarkerTree> {
match tree {
MarkerTree::And(trees) => {
let mut reduced = Vec::new();
Expand All @@ -155,20 +163,27 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> {
// Simplify nested expressions as much as possible first.
//
// If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), omit it.
let Some(subtree) = normalize(subtree) else {
let Some(subtree) = normalize_all(subtree) else {
continue;
};

// Extract expressions we may be able to simplify more.
if let MarkerTree::Expression(ref expr) = subtree {
if let Some((key, range)) = keyed_range(expr) {
versions.entry(key.clone()).or_default().push(range);
continue;
match subtree {
MarkerTree::Or(_) => reduced.push(subtree),
// Flatten nested `And` expressions.
MarkerTree::And(subtrees) => reduced.extend(subtrees),
// Extract expressions we may be able to simplify more.
MarkerTree::Expression(ref expr) => {
if let Some((key, range)) = keyed_range(expr) {
versions.entry(key.clone()).or_default().push(range);
continue;
}

reduced.push(subtree);
}
}
reduced.push(subtree);
}

// Combine version ranges.
simplify_ranges(&mut reduced, versions, |ranges| {
ranges
.iter()
Expand All @@ -193,19 +208,25 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> {
// Simplify nested expressions as much as possible first.
//
// If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), return `true`.
let subtree = normalize(subtree)?;

// Extract expressions we may be able to simplify more.
if let MarkerTree::Expression(ref expr) = subtree {
if let Some((key, range)) = keyed_range(expr) {
versions.entry(key.clone()).or_default().push(range);
continue;
let subtree = normalize_all(subtree)?;

match subtree {
MarkerTree::And(_) => reduced.push(subtree),
// Flatten nested `Or` expressions.
MarkerTree::Or(subtrees) => reduced.extend(subtrees),
// Extract expressions we may be able to simplify more.
MarkerTree::Expression(ref expr) => {
if let Some((key, range)) = keyed_range(expr) {
versions.entry(key.clone()).or_default().push(range);
continue;
}

reduced.push(subtree);
}
}

reduced.push(subtree);
}

// Combine version ranges.
simplify_ranges(&mut reduced, versions, |ranges| {
ranges
.iter()
Expand All @@ -226,6 +247,177 @@ pub(crate) fn normalize(tree: MarkerTree) -> Option<MarkerTree> {
}
}

/// Removes redundant expressions from the tree recursively.
///
/// This function does not attempt to flatten or clean the tree and may leave it in a denormalized state.
pub(crate) fn filter_all(tree: &mut MarkerTree) {
match tree {
MarkerTree::And(trees) => {
for subtree in &mut *trees {
filter_all(subtree);
}

for conjunct in collect_expressions(trees) {
// Filter out redundant disjunctions (by the Absorption Law).
trees.retain_mut(|tree| !filter_disjunctions(tree, &conjunct));

// Filter out redundant expressions in this conjunction.
for tree in &mut *trees {
filter_conjunct_exprs(tree, &conjunct);
}
}
}

MarkerTree::Or(trees) => {
for subtree in &mut *trees {
filter_all(subtree);
}

for disjunct in collect_expressions(trees) {
// Filter out redundant conjunctions (by the Absorption Law).
trees.retain_mut(|tree| !filter_conjunctions(tree, &disjunct));

// Filter out redundant expressions in this disjunction.
for tree in &mut *trees {
filter_disjunct_exprs(tree, &disjunct);
}
}
}

MarkerTree::Expression(_) => {}
}
}

// Collect all expressions from a tree.
fn collect_expressions(trees: &[MarkerTree]) -> Vec<MarkerExpression> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scroll-by Nit: // -> /// so it's rustdoc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might add a little more rustdoc for cases like these. As it is, I think the comment is effectively strictly redundant with the name and type signature. I think I'd write something like this:

/// For a given slice of marker trees, this collects
/// all direct leaf expressions from each marker tree,
/// and skips any marker trees that are themselves
/// not leaves.

I might also add some context as to why this routine is useful.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some more documentation.

trees
.iter()
.filter_map(|tree| match tree {
MarkerTree::Expression(expr) => Some(expr.clone()),
_ => None,
})
.collect()
}

// Filters out matching expressions from any nested disjunctions.
fn filter_disjunct_exprs(tree: &mut MarkerTree, disjunct: &MarkerExpression) {
match tree {
MarkerTree::Or(trees) => {
trees.retain_mut(|tree| match tree {
MarkerTree::Expression(expr) => expr != disjunct,
_ => {
filter_disjunct_exprs(tree, disjunct);
true
}
});
}
MarkerTree::And(trees) => {
for tree in trees {
filter_disjunct_exprs(tree, disjunct);
}
}

MarkerTree::Expression(_) => {}
}
}

// Filters out matching expressions from any nested conjunctions.
fn filter_conjunct_exprs(tree: &mut MarkerTree, conjunct: &MarkerExpression) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why this doesn't also filter them out for a direct leaf expression? I guess it would need to rewrite the marker expression as MarkerTree::And(vec![]) in that case...

Another perspective that I find helpful for routines like this is phrasing like, "returns a marker tree in which the given expression is assumed to be true in all conjunctions." I think that captures better the essence of the contract here if I'm understanding correctly.

Copy link
Member Author

@ibraheemdev ibraheemdev Jul 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's covered by the dedup calls in normalize_all. It's a bit trickier to do that here because we want to filter recursively, but only deduplicate from the outer conjunction or disjunction, depending on the context (we don't just want to remove the expression everywhere).

match tree {
MarkerTree::And(trees) => {
trees.retain_mut(|tree| match tree {
MarkerTree::Expression(expr) => expr != conjunct,
_ => {
filter_conjunct_exprs(tree, conjunct);
true
}
});
}
MarkerTree::Or(trees) => {
for tree in trees {
filter_conjunct_exprs(tree, conjunct);
}
}
MarkerTree::Expression(_) => {}
}
}

// Filters out disjunctions that contain the given expression.
//
// Returns `true` if the outer tree should be removed.
fn filter_disjunctions(tree: &mut MarkerTree, conjunct: &MarkerExpression) -> bool {
let disjunction = match tree {
MarkerTree::Or(trees) => trees,
// Recurse because the tree might not have been flattened.
MarkerTree::And(trees) => {
trees.retain_mut(|tree| !filter_disjunctions(tree, conjunct));
return trees.is_empty();
}
MarkerTree::Expression(_) => return false,
};

let mut filter = Vec::new();
for (i, tree) in disjunction.iter_mut().enumerate() {
match tree {
// Found a matching expression, filter out this entire tree.
MarkerTree::Expression(expr) if expr == conjunct => {
return true;
}
// Filter subtrees.
MarkerTree::Or(_) => {
if filter_disjunctions(tree, conjunct) {
filter.push(i);
}
}
_ => {}
}
}

for i in filter.into_iter().rev() {
disjunction.remove(i);
}

false
}

// Filters out conjunctions that contain the given expression.
//
// Returns `true` if the outer tree should be removed.
fn filter_conjunctions(tree: &mut MarkerTree, disjunct: &MarkerExpression) -> bool {
let conjunction = match tree {
MarkerTree::And(trees) => trees,
// Recurse because the tree might not have been flattened.
MarkerTree::Or(trees) => {
trees.retain_mut(|tree| !filter_conjunctions(tree, disjunct));
return trees.is_empty();
}
MarkerTree::Expression(_) => return false,
};

let mut filter = Vec::new();
for (i, tree) in conjunction.iter_mut().enumerate() {
match tree {
// Found a matching expression, filter out this entire tree.
MarkerTree::Expression(expr) if expr == disjunct => {
return true;
}
// Filter subtrees.
MarkerTree::And(_) => {
if filter_conjunctions(tree, disjunct) {
filter.push(i);
}
}
_ => {}
}
}

for i in filter.into_iter().rev() {
conjunction.remove(i);
}

false
}

// Simplify version expressions.
fn simplify_ranges(
reduced: &mut Vec<MarkerTree>,
Expand Down Expand Up @@ -487,12 +679,55 @@ mod tests {
"python_version == '3.18' and python_version < '3.17'",
);

// cannot simplify nested complex expressions
// flatten nested expressions
assert_marker_equal(
"((extra == 'a' and extra == 'b') and extra == 'c') and extra == 'b'",
"extra == 'a' and extra == 'b' and extra == 'c'",
);

assert_marker_equal(
"((extra == 'a' or extra == 'b') or extra == 'c') or extra == 'b'",
"extra == 'a' or extra == 'b' or extra == 'c'",
);

// complex expressions
assert_marker_equal(
"extra == 'a' or (extra == 'a' and extra == 'b')",
"extra == 'a'",
);

assert_marker_equal(
"extra == 'a' and (extra == 'a' or extra == 'b')",
"extra == 'a' and (extra == 'a' or extra == 'b')",
"extra == 'a'",
);

assert_marker_equal(
"(extra == 'a' and (extra == 'a' or extra == 'b')) or extra == 'd'",
"extra == 'a' or extra == 'd'",
);

assert_marker_equal(
"((extra == 'a' and extra == 'b') or extra == 'c') or extra == 'b'",
"extra == 'b' or extra == 'c'",
);

assert_marker_equal(
"((extra == 'a' or extra == 'b') and extra == 'c') and extra == 'b'",
"extra == 'b' and extra == 'c'",
);

assert_marker_equal(
"((extra == 'a' or extra == 'b') and extra == 'c') or extra == 'b'",
"extra == 'b' or (extra == 'a' and extra == 'c')",
);

// post-normalization filtering
assert_marker_equal(
"(python_version < '3.1' or python_version < '3.2') and (python_version < '3.2' or python_version == '3.3')",
"python_version < '3.2'",
);

// normalize out redundant ranges
assert_normalizes_out("python_version < '3.12.0rc1' or python_version >= '3.12.0rc1'");

assert_normalizes_out(
Expand Down Expand Up @@ -668,7 +903,12 @@ mod tests {
let tree1 = MarkerTree::parse_reporter(one.as_ref(), &mut TracingReporter).unwrap();
let tree1 = normalize(tree1).unwrap();
let tree2 = MarkerTree::parse_reporter(two.as_ref(), &mut TracingReporter).unwrap();
assert_eq!(tree1.to_string(), tree2.to_string());
assert_eq!(
tree1.to_string(),
tree2.to_string(),
"failed to normalize {}",
one.as_ref()
);
}

fn assert_normalizes_to(before: impl AsRef<str>, after: impl AsRef<str>) {
Expand Down
6 changes: 3 additions & 3 deletions crates/uv/tests/pip_compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6565,13 +6565,13 @@ fn universal_constraint_marker() -> Result<()> {
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] requirements.in -c constraints.txt --universal
anyio==3.0.0 ; sys_platform == 'win32' or (os_name == 'nt' and sys_platform == 'win32')
anyio==3.0.0 ; sys_platform == 'win32'
# via
# -c constraints.txt
# -r requirements.in
idna==3.6 ; sys_platform == 'win32' or (os_name == 'nt' and sys_platform == 'win32')
idna==3.6 ; sys_platform == 'win32'
# via anyio
sniffio==1.3.1 ; sys_platform == 'win32' or (os_name == 'nt' and sys_platform == 'win32')
sniffio==1.3.1 ; sys_platform == 'win32'
# via anyio

----- stderr -----
Expand Down
Loading