From 42b77c709e98f8f75498e1b57eac20756f6c6ead Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 14 Nov 2020 21:50:53 +0000 Subject: [PATCH 01/13] Add some tests --- .../ui/pattern/usefulness/consts-opaque.rs | 39 ++++++++++-- .../pattern/usefulness/consts-opaque.stderr | 60 +++++++++++-------- 2 files changed, 71 insertions(+), 28 deletions(-) diff --git a/src/test/ui/pattern/usefulness/consts-opaque.rs b/src/test/ui/pattern/usefulness/consts-opaque.rs index f87f96e34fccd..ca4fcd85bb6df 100644 --- a/src/test/ui/pattern/usefulness/consts-opaque.rs +++ b/src/test/ui/pattern/usefulness/consts-opaque.rs @@ -25,10 +25,6 @@ enum Baz { impl Eq for Baz {} const BAZ: Baz = Baz::Baz1; -type Quux = fn(usize, usize) -> usize; -fn quux(a: usize, b: usize) -> usize { a + b } -const QUUX: Quux = quux; - fn main() { match FOO { FOO => {} @@ -106,9 +102,44 @@ fn main() { //~^ ERROR unreachable pattern } + type Quux = fn(usize, usize) -> usize; + fn quux(a: usize, b: usize) -> usize { a + b } + const QUUX: Quux = quux; + match QUUX { QUUX => {} QUUX => {} _ => {} } + + #[derive(PartialEq, Eq)] + struct Wrap(T); + const WRAPQUUX: Wrap = Wrap(quux); + + match WRAPQUUX { + WRAPQUUX => {} + WRAPQUUX => {} + Wrap(_) => {} + } + + match WRAPQUUX { + Wrap(_) => {} + WRAPQUUX => {} // detected unreachable because we do inspect the `Wrap` layer + //~^ ERROR unreachable pattern + } + + #[derive(PartialEq, Eq)] + enum WhoKnows { + Yay(T), + Nope, + }; + const WHOKNOWSQUUX: WhoKnows = WhoKnows::Yay(quux); + + match WHOKNOWSQUUX { + WHOKNOWSQUUX => {} + WhoKnows::Yay(_) => {} + WHOKNOWSQUUX => {} // detected unreachable because we do inspect the `WhoKnows` layer + //~^ ERROR unreachable pattern + WhoKnows::Nope => {} + } } diff --git a/src/test/ui/pattern/usefulness/consts-opaque.stderr b/src/test/ui/pattern/usefulness/consts-opaque.stderr index f10166d5a3580..68451043cf540 100644 --- a/src/test/ui/pattern/usefulness/consts-opaque.stderr +++ b/src/test/ui/pattern/usefulness/consts-opaque.stderr @@ -1,11 +1,11 @@ error: to use a constant of type `Foo` in a pattern, `Foo` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:34:9 + --> $DIR/consts-opaque.rs:30:9 | LL | FOO => {} | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:36:9 + --> $DIR/consts-opaque.rs:32:9 | LL | _ => {} // should not be emitting unreachable warning | ^ @@ -17,19 +17,19 @@ LL | #![deny(unreachable_patterns)] | ^^^^^^^^^^^^^^^^^^^^ error: to use a constant of type `Foo` in a pattern, `Foo` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:41:9 + --> $DIR/consts-opaque.rs:37:9 | LL | FOO_REF => {} | ^^^^^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:43:9 + --> $DIR/consts-opaque.rs:39:9 | LL | Foo(_) => {} // should not be emitting unreachable warning | ^^^^^^ warning: to use a constant of type `Foo` in a pattern, `Foo` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:49:9 + --> $DIR/consts-opaque.rs:45:9 | LL | FOO_REF_REF => {} | ^^^^^^^^^^^ @@ -39,13 +39,13 @@ LL | FOO_REF_REF => {} = note: for more information, see issue #62411 error: to use a constant of type `Bar` in a pattern, `Bar` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:57:9 + --> $DIR/consts-opaque.rs:53:9 | LL | BAR => {} // should not be emitting unreachable warning | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:57:9 + --> $DIR/consts-opaque.rs:53:9 | LL | Bar => {} | --- matches any value @@ -53,7 +53,7 @@ LL | BAR => {} // should not be emitting unreachable warning | ^^^ unreachable pattern error: unreachable pattern - --> $DIR/consts-opaque.rs:60:9 + --> $DIR/consts-opaque.rs:56:9 | LL | Bar => {} | --- matches any value @@ -62,19 +62,19 @@ LL | _ => {} | ^ unreachable pattern error: to use a constant of type `Bar` in a pattern, `Bar` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:65:9 + --> $DIR/consts-opaque.rs:61:9 | LL | BAR => {} | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:67:9 + --> $DIR/consts-opaque.rs:63:9 | LL | Bar => {} // should not be emitting unreachable warning | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:69:9 + --> $DIR/consts-opaque.rs:65:9 | LL | Bar => {} // should not be emitting unreachable warning | --- matches any value @@ -83,76 +83,88 @@ LL | _ => {} | ^ unreachable pattern error: to use a constant of type `Bar` in a pattern, `Bar` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:74:9 + --> $DIR/consts-opaque.rs:70:9 | LL | BAR => {} | ^^^ error: to use a constant of type `Bar` in a pattern, `Bar` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:76:9 + --> $DIR/consts-opaque.rs:72:9 | LL | BAR => {} // should not be emitting unreachable warning | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:76:9 + --> $DIR/consts-opaque.rs:72:9 | LL | BAR => {} // should not be emitting unreachable warning | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:79:9 + --> $DIR/consts-opaque.rs:75:9 | LL | _ => {} // should not be emitting unreachable warning | ^ error: to use a constant of type `Baz` in a pattern, `Baz` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:84:9 + --> $DIR/consts-opaque.rs:80:9 | LL | BAZ => {} | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:86:9 + --> $DIR/consts-opaque.rs:82:9 | LL | Baz::Baz1 => {} // should not be emitting unreachable warning | ^^^^^^^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:88:9 + --> $DIR/consts-opaque.rs:84:9 | LL | _ => {} | ^ error: to use a constant of type `Baz` in a pattern, `Baz` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:94:9 + --> $DIR/consts-opaque.rs:90:9 | LL | BAZ => {} | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:96:9 + --> $DIR/consts-opaque.rs:92:9 | LL | _ => {} | ^ error: to use a constant of type `Baz` in a pattern, `Baz` must be annotated with `#[derive(PartialEq, Eq)]` - --> $DIR/consts-opaque.rs:101:9 + --> $DIR/consts-opaque.rs:97:9 | LL | BAZ => {} | ^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:103:9 + --> $DIR/consts-opaque.rs:99:9 | LL | Baz::Baz2 => {} // should not be emitting unreachable warning | ^^^^^^^^^ error: unreachable pattern - --> $DIR/consts-opaque.rs:105:9 + --> $DIR/consts-opaque.rs:101:9 | LL | _ => {} // should not be emitting unreachable warning | ^ -error: aborting due to 22 previous errors; 1 warning emitted +error: unreachable pattern + --> $DIR/consts-opaque.rs:127:9 + | +LL | WRAPQUUX => {} // detected unreachable because we do inspect the `Wrap` layer + | ^^^^^^^^ + +error: unreachable pattern + --> $DIR/consts-opaque.rs:141:9 + | +LL | WHOKNOWSQUUX => {} // detected unreachable because we do inspect the `WhoKnows` layer + | ^^^^^^^^^^^^ + +error: aborting due to 24 previous errors; 1 warning emitted From 5a24b2c2c7b483ef6eae7e46d69cc200cb02575d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Thu, 10 Dec 2020 13:52:51 +0000 Subject: [PATCH 02/13] Factor out `SplitIntRange` used for integer range splitting --- .../src/thir/pattern/deconstruct_pat.rs | 199 ++++++++++-------- 1 file changed, 110 insertions(+), 89 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index dd0c61a2882f3..a6cf0ebeb82ed 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -24,7 +24,7 @@ use rustc_target::abi::{Integer, Size, VariantIdx}; use smallvec::{smallvec, SmallVec}; use std::cmp::{self, max, min, Ordering}; -use std::iter::IntoIterator; +use std::iter::{once, IntoIterator}; use std::ops::RangeInclusive; /// An inclusive interval, used for precise integer exhaustiveness checking. @@ -183,77 +183,24 @@ impl IntRange { Pat { ty, span: DUMMY_SP, kind: Box::new(kind) } } - /// For exhaustive integer matching, some constructors are grouped within other constructors - /// (namely integer typed values are grouped within ranges). However, when specialising these - /// constructors, we want to be specialising for the underlying constructors (the integers), not - /// the groups (the ranges). Thus we need to split the groups up. Splitting them up naïvely would - /// mean creating a separate constructor for every single value in the range, which is clearly - /// impractical. However, observe that for some ranges of integers, the specialisation will be - /// identical across all values in that range (i.e., there are equivalence classes of ranges of - /// constructors based on their `U(S(c, P), S(c, p))` outcome). These classes are grouped by - /// the patterns that apply to them (in the matrix `P`). We can split the range whenever the - /// patterns that apply to that range (specifically: the patterns that *intersect* with that range) - /// change. - /// Our solution, therefore, is to split the range constructor into subranges at every single point - /// the group of intersecting patterns changes (using the method described below). - /// And voilà! We're testing precisely those ranges that we need to, without any exhaustive matching - /// on actual integers. The nice thing about this is that the number of subranges is linear in the - /// number of rows in the matrix (i.e., the number of cases in the `match` statement), so we don't - /// need to be worried about matching over gargantuan ranges. - /// - /// Essentially, given the first column of a matrix representing ranges, looking like the following: - /// - /// |------| |----------| |-------| || - /// |-------| |-------| |----| || - /// |---------| - /// - /// We split the ranges up into equivalence classes so the ranges are no longer overlapping: - /// - /// |--|--|||-||||--||---|||-------| |-|||| || - /// - /// The logic for determining how to split the ranges is fairly straightforward: we calculate - /// boundaries for each interval range, sort them, then create constructors for each new interval - /// between every pair of boundary points. (This essentially sums up to performing the intuitive - /// merging operation depicted above.) + /// Split this range, as described at the top of the file. fn split<'p, 'tcx>( &self, pcx: PatCtxt<'_, 'p, 'tcx>, hir_id: Option, ) -> SmallVec<[Constructor<'tcx>; 1]> { - /// Represents a border between 2 integers. Because the intervals spanning borders - /// must be able to cover every integer, we need to be able to represent - /// 2^128 + 1 such borders. - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] - enum Border { - JustBefore(u128), - AfterMax, - } - - // A function for extracting the borders of an integer interval. - fn range_borders(r: IntRange) -> impl Iterator { - let (lo, hi) = r.range.into_inner(); - let from = Border::JustBefore(lo); - let to = match hi.checked_add(1) { - Some(m) => Border::JustBefore(m), - None => Border::AfterMax, - }; - vec![from, to].into_iter() - } - - // Collect the span and range of all the intersecting ranges to lint on likely - // incorrect range patterns. (#63987) + // We collect the span and range of all the intersecting ranges to lint on likely incorrect + // range patterns. (#63987) let mut overlaps = vec![]; + let mut split_range = SplitIntRange::new(self.clone()); let row_len = pcx.matrix.column_count().unwrap_or(0); - // `borders` is the set of borders between equivalence classes: each equivalence - // class lies between 2 borders. - let row_borders = pcx + let intranges = pcx .matrix .head_ctors_and_spans(pcx.cx) - .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))) - .filter_map(|(range, span)| { - let intersection = self.intersection(&range); - let should_lint = self.suspicious_intersection(&range); - if let (Some(range), 1, true) = (&intersection, row_len, should_lint) { + .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))); + let intranges = intranges.inspect(|(range, span)| { + if let Some(intersection) = self.intersection(&range) { + if row_len == 1 && self.suspicious_intersection(&range) { // FIXME: for now, only check for overlapping ranges on simple range // patterns. Otherwise with the current logic the following is detected // as overlapping: @@ -264,36 +211,15 @@ impl IntRange { // _ => {} // } // ``` - overlaps.push((range.clone(), span)); + overlaps.push((intersection.clone(), *span)); } - intersection - }) - .flat_map(range_borders); - let self_borders = range_borders(self.clone()); - let mut borders: Vec<_> = row_borders.chain(self_borders).collect(); - borders.sort_unstable(); + } + }); + split_range.split(intranges.map(|(range, _)| range).cloned()); self.lint_overlapping_range_endpoints(pcx, hir_id, overlaps); - // We're going to iterate through every adjacent pair of borders, making sure that - // each represents an interval of nonnegative length, and convert each such - // interval into a constructor. - borders - .array_windows() - .filter_map(|&pair| match pair { - [Border::JustBefore(n), Border::JustBefore(m)] => { - if n < m { - Some(n..=(m - 1)) - } else { - None - } - } - [Border::JustBefore(n), Border::AfterMax] => Some(n..=u128::MAX), - [Border::AfterMax, _] => None, - }) - .map(|range| IntRange { range }) - .map(IntRange) - .collect() + split_range.iter().map(IntRange).collect() } fn lint_overlapping_range_endpoints( @@ -339,6 +265,101 @@ impl IntRange { } } +/// Represents a border between 2 integers. Because the intervals spanning borders must be able to +/// cover every integer, we need to be able to represent 2^128 + 1 such borders. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum IntBorder { + JustBefore(u128), + AfterMax, +} + +/// A range of integers that is partitioned into disjoint subranges. +/// +/// This is fed an input of multiple ranges, and returns an output that covers the union of the +/// inputs but is split so that an output range only intersects an input range by being a subrange +/// of it. No output range straddles the boundary of one of the inputs. This does constructor +/// splitting for integer ranges as explained at the top of the file. +/// +/// The following input: +/// ``` +/// |-------------------------| // `self` +/// |------| |----------| |----| +/// |-------| |-------| +/// ``` +/// would be iterated over as follows: +/// ``` +/// ||---|--||-|---|---|---|--| +/// ``` +#[derive(Debug, Clone)] +struct SplitIntRange { + /// The range we are splitting + range: IntRange, + /// The borders of ranges we have seen. They are all contained within `range`. This is kept + /// sorted. + borders: Vec, +} + +impl SplitIntRange { + fn new(r: IntRange) -> Self { + SplitIntRange { range: r.clone(), borders: Vec::new() } + } + + /// Internal use + fn to_borders(r: IntRange) -> [IntBorder; 2] { + use IntBorder::*; + let (lo, hi) = r.boundaries(); + let lo = JustBefore(lo); + let hi = match hi.checked_add(1) { + Some(m) => JustBefore(m), + None => AfterMax, + }; + [lo, hi] + } + + /// Add ranges relative to which we split. + fn split(&mut self, ranges: impl Iterator) { + let this_range = &self.range; + let included_ranges = ranges.filter_map(|r| this_range.intersection(&r)); + let included_borders = included_ranges.flat_map(|r| { + let borders = Self::to_borders(r); + once(borders[0]).chain(once(borders[1])) + }); + self.borders.extend(included_borders); + self.borders.sort_unstable(); + } + + /// Iterate over the contained ranges. + fn iter<'a>(&'a self) -> impl Iterator + Captures<'a> { + use IntBorder::*; + + let self_range = Self::to_borders(self.range.clone()); + // Start with the start of the range. + let mut prev_border = self_range[0]; + self.borders + .iter() + .copied() + // End with the end of the range. + .chain(once(self_range[1])) + // List pairs of adjacent borders. + .map(move |border| { + let ret = (prev_border, border); + prev_border = border; + ret + }) + // Skip duplicates. + .filter(|(prev_border, border)| prev_border != border) + // Finally, convert to ranges. + .map(|(prev_border, border)| { + let range = match (prev_border, border) { + (JustBefore(n), JustBefore(m)) if n < m => n..=(m - 1), + (JustBefore(n), AfterMax) => n..=u128::MAX, + _ => unreachable!(), // Ruled out by the sorting and filtering we did + }; + IntRange { range } + }) + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum SliceKind { /// Patterns of length `n` (`[x, y]`). From 7948f919108b43d69debcf7ed57d8944407463dd Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 19 Dec 2020 00:37:36 +0000 Subject: [PATCH 03/13] Run the annoying lint separately --- .../src/thir/pattern/deconstruct_pat.rs | 88 ++++++++----------- .../src/thir/pattern/usefulness.rs | 9 +- 2 files changed, 43 insertions(+), 54 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index a6cf0ebeb82ed..f4dd7d2dcd64f 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -19,7 +19,7 @@ use rustc_middle::mir::Field; use rustc_middle::ty::layout::IntegerExt; use rustc_middle::ty::{self, Const, Ty, TyCtxt}; use rustc_session::lint; -use rustc_span::{Span, DUMMY_SP}; +use rustc_span::DUMMY_SP; use rustc_target::abi::{Integer, Size, VariantIdx}; use smallvec::{smallvec, SmallVec}; @@ -184,51 +184,42 @@ impl IntRange { } /// Split this range, as described at the top of the file. - fn split<'p, 'tcx>( - &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - hir_id: Option, - ) -> SmallVec<[Constructor<'tcx>; 1]> { - // We collect the span and range of all the intersecting ranges to lint on likely incorrect - // range patterns. (#63987) - let mut overlaps = vec![]; + fn split<'p, 'tcx>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { let mut split_range = SplitIntRange::new(self.clone()); - let row_len = pcx.matrix.column_count().unwrap_or(0); - let intranges = pcx - .matrix - .head_ctors_and_spans(pcx.cx) - .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))); - let intranges = intranges.inspect(|(range, span)| { - if let Some(intersection) = self.intersection(&range) { - if row_len == 1 && self.suspicious_intersection(&range) { - // FIXME: for now, only check for overlapping ranges on simple range - // patterns. Otherwise with the current logic the following is detected - // as overlapping: - // ``` - // match (0u8, true) { - // (0 ..= 125, false) => {} - // (125 ..= 255, true) => {} - // _ => {} - // } - // ``` - overlaps.push((intersection.clone(), *span)); - } - } - }); - split_range.split(intranges.map(|(range, _)| range).cloned()); - - self.lint_overlapping_range_endpoints(pcx, hir_id, overlaps); - + let intranges = pcx.matrix.head_ctors(pcx.cx).filter_map(|ctor| ctor.as_int_range()); + split_range.split(intranges.cloned()); split_range.iter().map(IntRange).collect() } - fn lint_overlapping_range_endpoints( - &self, - pcx: PatCtxt<'_, '_, '_>, - hir_id: Option, - overlaps: Vec<(IntRange, Span)>, - ) { - if let (true, Some(hir_id)) = (!overlaps.is_empty(), hir_id) { + /// Lint on likely incorrect range patterns (#63987) + pub(super) fn lint_overlapping_range_endpoints(&self, pcx: PatCtxt<'_, '_, '_>, hir_id: HirId) { + if self.is_singleton() { + return; + } + + if pcx.matrix.column_count().unwrap_or(0) != 1 { + // FIXME: for now, only check for overlapping ranges on simple range + // patterns. Otherwise with the current logic the following is detected + // as overlapping: + // ``` + // match (0u8, true) { + // (0 ..= 125, false) => {} + // (125 ..= 255, true) => {} + // _ => {} + // } + // ``` + return; + } + + let overlaps: Vec<_> = pcx + .matrix + .head_ctors_and_spans(pcx.cx) + .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))) + .filter(|(range, _)| self.suspicious_intersection(range)) + .map(|(range, span)| (self.intersection(&range).unwrap(), span)) + .collect(); + + if !overlaps.is_empty() { pcx.cx.tcx.struct_span_lint_hir( lint::builtin::OVERLAPPING_RANGE_ENDPOINTS, hir_id, @@ -673,21 +664,14 @@ impl<'tcx> Constructor<'tcx> { /// This function may discard some irrelevant constructors if this preserves behavior and /// diagnostics. Eg. for the `_` case, we ignore the constructors already present in the /// matrix, unless all of them are. - /// - /// `hir_id` is `None` when we're evaluating the wildcard pattern. In that case we do not want - /// to lint for overlapping ranges. - pub(super) fn split<'p>( - &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - hir_id: Option, - ) -> SmallVec<[Self; 1]> { + pub(super) fn split<'p>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Self; 1]> { debug!("Constructor::split({:#?}, {:#?})", self, pcx.matrix); match self { Wildcard => Constructor::split_wildcard(pcx), // Fast-track if the range is trivial. In particular, we don't do the overlapping // ranges check. - IntRange(ctor_range) if !ctor_range.is_singleton() => ctor_range.split(pcx, hir_id), + IntRange(ctor_range) if !ctor_range.is_singleton() => ctor_range.split(pcx), Slice(slice @ Slice { kind: VarLen(..), .. }) => slice.split(pcx), // Any other constructor can be used unchanged. _ => smallvec![self.clone()], @@ -937,7 +921,7 @@ impl<'tcx> MissingConstructors<'tcx> { pcx.matrix.head_ctors(pcx.cx).cloned().filter(|c| !c.is_wildcard()).collect(); // Since `all_ctors` never contains wildcards, this won't recurse further. let all_ctors = - all_constructors(pcx).into_iter().flat_map(|ctor| ctor.split(pcx, None)).collect(); + all_constructors(pcx).into_iter().flat_map(|ctor| ctor.split(pcx)).collect(); MissingConstructors { all_ctors, used_ctors } } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 0ecc034ac0ba9..877d48a85b76b 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -991,11 +991,16 @@ fn is_useful<'p, 'tcx>( }); Usefulness::merge(usefulnesses) } else { + let v_ctor = v.head_ctor(cx); + if let Constructor::IntRange(ctor_range) = &v_ctor { + // Lint on likely incorrect range patterns (#63987) + ctor_range.lint_overlapping_range_endpoints(pcx, hir_id) + } // We split the head constructor of `v`. - let ctors = v.head_ctor(cx).split(pcx, Some(hir_id)); + let split_ctors = v_ctor.split(pcx); // For each constructor, we compute whether there's a value that starts with it that would // witness the usefulness of `v`. - let usefulnesses = ctors.into_iter().map(|ctor| { + let usefulnesses = split_ctors.into_iter().map(|ctor| { // We cache the result of `Fields::wildcards` because it is used a lot. let ctor_wild_subpatterns = Fields::wildcards(pcx, &ctor); let matrix = pcx.matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns); From 9d0c2ed913efa358c2ce9f17e4753f72c5169dd5 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 11 Dec 2020 22:20:14 +0000 Subject: [PATCH 04/13] Factor out `SplitVarLenSlice` used for slice splitting --- .../src/thir/pattern/deconstruct_pat.rs | 222 +++++++++--------- 1 file changed, 117 insertions(+), 105 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index f4dd7d2dcd64f..3c6cade1634fd 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -403,129 +403,141 @@ impl Slice { self.kind.arity() } - /// The exhaustiveness-checking paper does not include any details on - /// checking variable-length slice patterns. However, they may be - /// matched by an infinite collection of fixed-length array patterns. - /// - /// Checking the infinite set directly would take an infinite amount - /// of time. However, it turns out that for each finite set of - /// patterns `P`, all sufficiently large array lengths are equivalent: - /// - /// Each slice `s` with a "sufficiently-large" length `l ≥ L` that applies - /// to exactly the subset `Pₜ` of `P` can be transformed to a slice - /// `sₘ` for each sufficiently-large length `m` that applies to exactly - /// the same subset of `P`. - /// - /// Because of that, each witness for reachability-checking of one - /// of the sufficiently-large lengths can be transformed to an - /// equally-valid witness of any other length, so we only have - /// to check slices of the "minimal sufficiently-large length" - /// and less. - /// - /// Note that the fact that there is a *single* `sₘ` for each `m` - /// not depending on the specific pattern in `P` is important: if - /// you look at the pair of patterns - /// `[true, ..]` - /// `[.., false]` - /// Then any slice of length ≥1 that matches one of these two - /// patterns can be trivially turned to a slice of any - /// other length ≥1 that matches them and vice-versa, - /// but the slice of length 2 `[false, true]` that matches neither - /// of these patterns can't be turned to a slice from length 1 that - /// matches neither of these patterns, so we have to consider - /// slices from length 2 there. - /// - /// Now, to see that that length exists and find it, observe that slice - /// patterns are either "fixed-length" patterns (`[_, _, _]`) or - /// "variable-length" patterns (`[_, .., _]`). - /// - /// For fixed-length patterns, all slices with lengths *longer* than - /// the pattern's length have the same outcome (of not matching), so - /// as long as `L` is greater than the pattern's length we can pick - /// any `sₘ` from that length and get the same result. - /// - /// For variable-length patterns, the situation is more complicated, - /// because as seen above the precise value of `sₘ` matters. - /// - /// However, for each variable-length pattern `p` with a prefix of length - /// `plₚ` and suffix of length `slₚ`, only the first `plₚ` and the last - /// `slₚ` elements are examined. - /// - /// Therefore, as long as `L` is positive (to avoid concerns about empty - /// types), all elements after the maximum prefix length and before - /// the maximum suffix length are not examined by any variable-length - /// pattern, and therefore can be added/removed without affecting - /// them - creating equivalent patterns from any sufficiently-large - /// length. - /// - /// Of course, if fixed-length patterns exist, we must be sure - /// that our length is large enough to miss them all, so - /// we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` - /// - /// for example, with the above pair of patterns, all elements - /// but the first and last can be added/removed, so any - /// witness of length ≥2 (say, `[false, false, true]`) can be - /// turned to a witness from any other length ≥2. + /// Split this slice, as described at the top of the file. fn split<'p, 'tcx>(self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { let (self_prefix, self_suffix) = match self.kind { VarLen(self_prefix, self_suffix) => (self_prefix, self_suffix), _ => return smallvec![Slice(self)], }; - let head_ctors = pcx.matrix.head_ctors(pcx.cx).filter(|c| !c.is_wildcard()); + let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, self.array_len); + let slices = pcx.matrix.head_ctors(pcx.cx).filter_map(|c| c.as_slice()).map(|s| s.kind); + split_self.split(slices); + split_self.iter().map(Slice).collect() + } - let mut max_prefix_len = self_prefix; - let mut max_suffix_len = self_suffix; - let mut max_fixed_len = 0; + /// See `Constructor::is_covered_by` + fn is_covered_by(self, other: Self) -> bool { + other.kind.covers_length(self.arity()) + } +} - for ctor in head_ctors { - if let Slice(slice) = ctor { - match slice.kind { - FixedLen(len) => { - max_fixed_len = cmp::max(max_fixed_len, len); - } - VarLen(prefix, suffix) => { - max_prefix_len = cmp::max(max_prefix_len, prefix); - max_suffix_len = cmp::max(max_suffix_len, suffix); - } +/// The exhaustiveness-checking paper does not include any details on checking variable-length +/// slice patterns. However, they may be matched by an infinite collection of fixed-length array +/// patterns. +/// +/// Checking the infinite set directly would take an infinite amount of time. However, it turns out +/// that for each finite set of patterns `P`, all sufficiently large array lengths are equivalent: +/// +/// Each slice `s` with a "sufficiently-large" length `l ≥ L` that applies to exactly the subset +/// `Pₜ` of `P` can be transformed to a slice `sₘ` for each sufficiently-large length `m` that +/// applies to exactly the same subset of `P`. +/// +/// Because of that, each witness for reachability-checking of one of the sufficiently-large +/// lengths can be transformed to an equally-valid witness of any other length, so we only have to +/// check slices of the "minimal sufficiently-large length" and less. +/// +/// Note that the fact that there is a *single* `sₘ` for each `m` not depending on the specific +/// pattern in `P` is important: if you look at the pair of patterns `[true, ..]` `[.., false]` +/// Then any slice of length ≥1 that matches one of these two patterns can be trivially turned to a +/// slice of any other length ≥1 that matches them and vice-versa, but the slice of length 2 +/// `[false, true]` that matches neither of these patterns can't be turned to a slice from length 1 +/// that matches neither of these patterns, so we have to consider slices from length 2 there. +/// +/// Now, to see that that length exists and find it, observe that slice patterns are either +/// "fixed-length" patterns (`[_, _, _]`) or "variable-length" patterns (`[_, .., _]`). +/// +/// For fixed-length patterns, all slices with lengths *longer* than the pattern's length have the +/// same outcome (of not matching), so as long as `L` is greater than the pattern's length we can +/// pick any `sₘ` from that length and get the same result. +/// +/// For variable-length patterns, the situation is more complicated, because as seen above the +/// precise value of `sₘ` matters. +/// +/// However, for each variable-length pattern `p` with a prefix of length `plₚ` and suffix of +/// length `slₚ`, only the first `plₚ` and the last `slₚ` elements are examined. +/// +/// Therefore, as long as `L` is positive (to avoid concerns about empty types), all elements after +/// the maximum prefix length and before the maximum suffix length are not examined by any +/// variable-length pattern, and therefore can be added/removed without affecting them - creating +/// equivalent patterns from any sufficiently-large length. +/// +/// Of course, if fixed-length patterns exist, we must be sure that our length is large enough to +/// miss them all, so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` +/// +/// `max_slice` below will be made to have arity `L`. +/// +/// For example, with the above pair of patterns, all elements but the first and last can be +/// added/removed, so any witness of length ≥2 (say, `[false, false, true]`) can be turned to a +/// witness from any other length ≥2. +#[derive(Debug)] +struct SplitVarLenSlice { + /// If the type is an array, this is its size. + array_len: Option, + /// The arity of the input slice. + arity: u64, + /// The smallest slice bigger than any slice seen. `max_slice.arity()` is the length `L` + /// described above. + max_slice: SliceKind, +} + +impl SplitVarLenSlice { + fn new(prefix: u64, suffix: u64, array_len: Option) -> Self { + SplitVarLenSlice { array_len, arity: prefix + suffix, max_slice: VarLen(prefix, suffix) } + } + + /// Pass a set of slices relative to which to split this one. + fn split(&mut self, slices: impl Iterator) { + let (max_prefix_len, max_suffix_len) = match &mut self.max_slice { + VarLen(prefix, suffix) => (prefix, suffix), + FixedLen(_) => return, // No need to split + }; + // We grow `self.max_slice` to be larger than all slices encountered, as described above. + // For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that + // `L = max_prefix_len + max_suffix_len`. + let mut max_fixed_len = 0; + for slice in slices { + match slice { + FixedLen(len) => { + max_fixed_len = cmp::max(max_fixed_len, len); + } + VarLen(prefix, suffix) => { + *max_prefix_len = cmp::max(*max_prefix_len, prefix); + *max_suffix_len = cmp::max(*max_suffix_len, suffix); } - } else { - bug!("unexpected ctor for slice type: {:?}", ctor); } } - - // For diagnostics, we keep the prefix and suffix lengths separate, so in the case - // where `max_fixed_len + 1` is the largest, we adapt `max_prefix_len` accordingly, - // so that `L = max_prefix_len + max_suffix_len`. - if max_fixed_len + 1 >= max_prefix_len + max_suffix_len { + // We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and + // suffix separate. + if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len { // The subtraction can't overflow thanks to the above check. - // The new `max_prefix_len` is also guaranteed to be larger than its previous - // value. - max_prefix_len = max_fixed_len + 1 - max_suffix_len; + // The new `max_prefix_len` is larger than its previous value. + *max_prefix_len = max_fixed_len + 1 - *max_suffix_len; } - let final_slice = VarLen(max_prefix_len, max_suffix_len); - let final_slice = Slice::new(self.array_len, final_slice); + // We cap the arity of `max_slice` at the array size. match self.array_len { - Some(_) => smallvec![Slice(final_slice)], - None => { - // `self` originally covered the range `(self.arity()..infinity)`. We split that - // range into two: lengths smaller than `final_slice.arity()` are treated - // independently as fixed-lengths slices, and lengths above are captured by - // `final_slice`. - let smaller_lengths = (self.arity()..final_slice.arity()).map(FixedLen); - smaller_lengths - .map(|kind| Slice::new(self.array_len, kind)) - .chain(Some(final_slice)) - .map(Slice) - .collect() - } + Some(len) if self.max_slice.arity() >= len => self.max_slice = FixedLen(len), + _ => {} } } - /// See `Constructor::is_covered_by` - fn is_covered_by(self, other: Self) -> bool { - other.kind.covers_length(self.arity()) + /// Iterate over the partition of this slice. + fn iter<'a>(&'a self) -> impl Iterator + Captures<'a> { + let smaller_lengths = match self.array_len { + // The only admissible fixed-length slice is one of the array size. Whether `max_slice` + // is fixed-length or variable-length, it will be the only relevant slice to output + // here. + Some(_) => (0..0), // empty range + // We cover all arities in the range `(self.arity..infinity)`. We split that range into + // two: lengths smaller than `max_slice.arity()` are treated independently as + // fixed-lengths slices, and lengths above are captured by `max_slice`. + None => self.arity..self.max_slice.arity(), + }; + smaller_lengths + .map(FixedLen) + .chain(once(self.max_slice)) + .map(move |kind| Slice::new(self.array_len, kind)) } } From bbb4ac06511fd0a45632327b7888a31e4f7854e5 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 13 Dec 2020 23:56:13 +0000 Subject: [PATCH 05/13] Rebrand `MissingConstructors` as `SplitWildcard` --- .../src/thir/pattern/deconstruct_pat.rs | 109 ++++++++++-------- .../src/thir/pattern/usefulness.rs | 7 +- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 3c6cade1634fd..9b065810ad47b 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -695,20 +695,9 @@ impl<'tcx> Constructor<'tcx> { /// Two constructors that are not in the matrix will either both be caught (by a wildcard), or /// both not be caught. Therefore we can keep the missing constructors grouped together. fn split_wildcard<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Self; 1]> { - // Missing constructors are those that are not matched by any non-wildcard patterns in the - // current column. We only fully construct them on-demand, because they're rarely used and - // can be big. - let missing_ctors = MissingConstructors::new(pcx); - if missing_ctors.is_empty(pcx) { - // All the constructors are present in the matrix, so we just go through them all. - // We must also split them first. - missing_ctors.all_ctors - } else { - // Some constructors are missing, thus we can specialize with the wildcard constructor, - // which will stand for those constructors that are missing, and behaves like any of - // them. - smallvec![Wildcard] - } + let mut split_wildcard = SplitWildcard::new(pcx); + split_wildcard.split(pcx); + split_wildcard.into_ctors(pcx) } /// Returns whether `self` is covered by `other`, i.e. whether `self` is a subset of `other`. @@ -811,7 +800,7 @@ impl<'tcx> Constructor<'tcx> { /// `Option`, we do not include `Some(_)` in the returned list of constructors. /// Invariant: this returns an empty `Vec` if and only if the type is uninhabited (as determined by /// `cx.is_uninhabited()`). -fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec> { +fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { debug!("all_constructors({:?})", pcx.ty); let cx = pcx.cx; let make_range = |start, end| { @@ -821,19 +810,19 @@ fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec vec![make_range(0, 1)], + ty::Bool => smallvec![make_range(0, 1)], ty::Array(sub_ty, len) if len.try_eval_usize(cx.tcx, cx.param_env).is_some() => { let len = len.eval_usize(cx.tcx, cx.param_env); if len != 0 && cx.is_uninhabited(sub_ty) { - vec![] + smallvec![] } else { - vec![Slice(Slice::new(Some(len), VarLen(0, 0)))] + smallvec![Slice(Slice::new(Some(len), VarLen(0, 0)))] } } // Treat arrays of a constant but unknown length like slices. ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { let kind = if cx.is_uninhabited(sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; - vec![Slice(Slice::new(None, kind))] + smallvec![Slice(Slice::new(None, kind))] } ty::Adt(def, substs) if def.is_enum() => { // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an @@ -863,7 +852,7 @@ fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec { - vec![ + smallvec![ // The valid Unicode Scalar Value ranges. make_range('\u{0000}' as u128, '\u{D7FF}' as u128), make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), @@ -893,66 +882,94 @@ fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec { let bits = Integer::from_attr(&cx.tcx, SignedInt(ity)).size().bits() as u128; let min = 1u128 << (bits - 1); let max = min - 1; - vec![make_range(min, max)] + smallvec![make_range(min, max)] } &ty::Uint(uty) => { let size = Integer::from_attr(&cx.tcx, UnsignedInt(uty)).size(); let max = size.truncate(u128::MAX); - vec![make_range(0, max)] + smallvec![make_range(0, max)] } // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot // expose its emptiness. The exception is if the pattern is at the top level, because we // want empty matches to be considered exhaustive. ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { - vec![NonExhaustive] + smallvec![NonExhaustive] } - ty::Never => vec![], - _ if cx.is_uninhabited(pcx.ty) => vec![], - ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => vec![Single], + ty::Never => smallvec![], + _ if cx.is_uninhabited(pcx.ty) => smallvec![], + ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => smallvec![Single], // This type is one for which we cannot list constructors, like `str` or `f64`. - _ => vec![NonExhaustive], + _ => smallvec![NonExhaustive], } } -// A struct to compute a set of constructors equivalent to `all_ctors \ used_ctors`. +/// A wildcard constructor that we split relative to the constructors in the matrix, as explained +/// at the top of the file. +/// For splitting wildcards, there are two groups of constructors: there are the constructors +/// actually present in the matrix (`matrix_ctors`), and the constructors not present. Two +/// constructors that are not in the matrix will either both be covered (by a wildcard), or both +/// not be covered by any given row. Therefore we can keep the missing constructors grouped +/// together. #[derive(Debug)] -pub(super) struct MissingConstructors<'tcx> { +pub(super) struct SplitWildcard<'tcx> { + /// Constructors seen in the matrix. + matrix_ctors: Vec>, + /// All the constructors for this type all_ctors: SmallVec<[Constructor<'tcx>; 1]>, - used_ctors: Vec>, } -impl<'tcx> MissingConstructors<'tcx> { +impl<'tcx> SplitWildcard<'tcx> { pub(super) fn new<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Self { - let used_ctors: Vec> = + let matrix_ctors = Vec::new(); + let all_ctors = all_constructors(pcx); + SplitWildcard { matrix_ctors, all_ctors } + } + + /// Pass a set of constructors relative to which to split this one. Don't call twice, it won't + /// do what you want. + pub(super) fn split(&mut self, pcx: PatCtxt<'_, '_, 'tcx>) { + self.matrix_ctors = pcx.matrix.head_ctors(pcx.cx).cloned().filter(|c| !c.is_wildcard()).collect(); // Since `all_ctors` never contains wildcards, this won't recurse further. - let all_ctors = - all_constructors(pcx).into_iter().flat_map(|ctor| ctor.split(pcx)).collect(); - - MissingConstructors { all_ctors, used_ctors } + self.all_ctors = self.all_ctors.iter().flat_map(|ctor| ctor.split(pcx)).collect(); } - fn is_empty<'p>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> bool { - self.iter(pcx).next().is_none() + /// Whether there are any value constructors for this type that are not present in the matrix. + fn any_missing(&self, pcx: PatCtxt<'_, '_, 'tcx>) -> bool { + self.iter_missing(pcx).next().is_some() } - /// Iterate over all_ctors \ used_ctors - fn iter<'a, 'p>( + /// Iterate over the constructors for this type that are not present in the matrix. + fn iter_missing<'a, 'p>( &'a self, pcx: PatCtxt<'a, 'p, 'tcx>, ) -> impl Iterator> + Captures<'p> { - self.all_ctors.iter().filter(move |ctor| !ctor.is_covered_by_any(pcx, &self.used_ctors)) + self.all_ctors.iter().filter(move |ctor| !ctor.is_covered_by_any(pcx, &self.matrix_ctors)) + } + + /// Return the set of constructors resulting from splitting the wildcard. As explained at the + /// top of the file, if any constructors are missing we can ignore the present ones. + fn into_ctors(self, pcx: PatCtxt<'_, '_, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { + if self.any_missing(pcx) { + // Some constructors are missing, thus we can specialize with the wildcard constructor, + // which will stand for those constructors that are missing, and matches the same rows + // as any of them (namely the wildcard rows). + return smallvec![Wildcard]; + } + + // All the constructors are present in the matrix, so we just go through them all. + self.all_ctors } /// List the patterns corresponding to the missing constructors. In some cases, instead of /// listing all constructors of a given type, we prefer to simply report a wildcard. - pub(super) fn report_patterns<'p>( + pub(super) fn report_missing_patterns<'p>( &self, pcx: PatCtxt<'_, 'p, 'tcx>, ) -> SmallVec<[Pat<'tcx>; 1]> { @@ -984,7 +1001,7 @@ impl<'tcx> MissingConstructors<'tcx> { // The exception is: if we are at the top-level, for example in an empty match, we // sometimes prefer reporting the list of constructors instead of just `_`. let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); - if self.used_ctors.is_empty() && !report_when_all_missing { + if self.matrix_ctors.is_empty() && !report_when_all_missing { // All constructors are unused. Report only a wildcard // rather than each individual constructor. smallvec![Pat::wildcard_from_ty(pcx.ty)] @@ -993,7 +1010,7 @@ impl<'tcx> MissingConstructors<'tcx> { // constructor, that matches everything that can be built with // it. For example, if `ctor` is a `Constructor::Variant` for // `Option::Some`, we get the pattern `Some(_)`. - self.iter(pcx) + self.iter_missing(pcx) .map(|missing_ctor| Fields::wildcards(pcx, &missing_ctor).apply(pcx, missing_ctor)) .collect() } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 877d48a85b76b..5da42e705df88 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -306,7 +306,7 @@ use self::Usefulness::*; use self::WitnessPreference::*; -use super::deconstruct_pat::{Constructor, Fields, MissingConstructors}; +use super::deconstruct_pat::{Constructor, Fields, SplitWildcard}; use super::{Pat, PatKind}; use super::{PatternFoldable, PatternFolder}; @@ -810,8 +810,9 @@ impl<'tcx> Usefulness<'tcx> { match self { UsefulWithWitness(witnesses) => { let new_witnesses = if ctor.is_wildcard() { - let missing_ctors = MissingConstructors::new(pcx); - let new_patterns = missing_ctors.report_patterns(pcx); + let mut split_wildcard = SplitWildcard::new(pcx); + split_wildcard.split(pcx); + let new_patterns = split_wildcard.report_missing_patterns(pcx); witnesses .into_iter() .flat_map(|witness| { From 3141f2d78c446b35930d4a73273578e6c6b488f5 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 14 Dec 2020 01:09:06 +0000 Subject: [PATCH 06/13] Inline `all_constructors` --- .../src/thir/pattern/deconstruct_pat.rs | 236 +++++++++--------- 1 file changed, 115 insertions(+), 121 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 9b065810ad47b..9e02cb2e36948 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -791,124 +791,6 @@ impl<'tcx> Constructor<'tcx> { } } -/// This determines the set of all possible constructors of a pattern matching -/// values of type `left_ty`. For vectors, this would normally be an infinite set -/// but is instead bounded by the maximum fixed length of slice patterns in -/// the column of patterns being analyzed. -/// -/// We make sure to omit constructors that are statically impossible. E.g., for -/// `Option`, we do not include `Some(_)` in the returned list of constructors. -/// Invariant: this returns an empty `Vec` if and only if the type is uninhabited (as determined by -/// `cx.is_uninhabited()`). -fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { - debug!("all_constructors({:?})", pcx.ty); - let cx = pcx.cx; - let make_range = |start, end| { - IntRange( - // `unwrap()` is ok because we know the type is an integer. - IntRange::from_range(cx.tcx, start, end, pcx.ty, &RangeEnd::Included).unwrap(), - ) - }; - match pcx.ty.kind() { - ty::Bool => smallvec![make_range(0, 1)], - ty::Array(sub_ty, len) if len.try_eval_usize(cx.tcx, cx.param_env).is_some() => { - let len = len.eval_usize(cx.tcx, cx.param_env); - if len != 0 && cx.is_uninhabited(sub_ty) { - smallvec![] - } else { - smallvec![Slice(Slice::new(Some(len), VarLen(0, 0)))] - } - } - // Treat arrays of a constant but unknown length like slices. - ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { - let kind = if cx.is_uninhabited(sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; - smallvec![Slice(Slice::new(None, kind))] - } - ty::Adt(def, substs) if def.is_enum() => { - // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an - // additional "unknown" constructor. - // There is no point in enumerating all possible variants, because the user can't - // actually match against them all themselves. So we always return only the fictitious - // constructor. - // E.g., in an example like: - // - // ``` - // let err: io::ErrorKind = ...; - // match err { - // io::ErrorKind::NotFound => {}, - // } - // ``` - // - // we don't want to show every possible IO error, but instead have only `_` as the - // witness. - let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(pcx.ty); - - // If `exhaustive_patterns` is disabled and our scrutinee is an empty enum, we treat it - // as though it had an "unknown" constructor to avoid exposing its emptiness. The - // exception is if the pattern is at the top level, because we want empty matches to be - // considered exhaustive. - let is_secretly_empty = def.variants.is_empty() - && !cx.tcx.features().exhaustive_patterns - && !pcx.is_top_level; - - if is_secretly_empty || is_declared_nonexhaustive { - smallvec![NonExhaustive] - } else if cx.tcx.features().exhaustive_patterns { - // If `exhaustive_patterns` is enabled, we exclude variants known to be - // uninhabited. - def.variants - .iter() - .filter(|v| { - !v.uninhabited_from(cx.tcx, substs, def.adt_kind(), cx.param_env) - .contains(cx.tcx, cx.module) - }) - .map(|v| Variant(v.def_id)) - .collect() - } else { - def.variants.iter().map(|v| Variant(v.def_id)).collect() - } - } - ty::Char => { - smallvec![ - // The valid Unicode Scalar Value ranges. - make_range('\u{0000}' as u128, '\u{D7FF}' as u128), - make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), - ] - } - ty::Int(_) | ty::Uint(_) - if pcx.ty.is_ptr_sized_integral() - && !cx.tcx.features().precise_pointer_size_matching => - { - // `usize`/`isize` are not allowed to be matched exhaustively unless the - // `precise_pointer_size_matching` feature is enabled. So we treat those types like - // `#[non_exhaustive]` enums by returning a special unmatcheable constructor. - smallvec![NonExhaustive] - } - &ty::Int(ity) => { - let bits = Integer::from_attr(&cx.tcx, SignedInt(ity)).size().bits() as u128; - let min = 1u128 << (bits - 1); - let max = min - 1; - smallvec![make_range(min, max)] - } - &ty::Uint(uty) => { - let size = Integer::from_attr(&cx.tcx, UnsignedInt(uty)).size(); - let max = size.truncate(u128::MAX); - smallvec![make_range(0, max)] - } - // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot - // expose its emptiness. The exception is if the pattern is at the top level, because we - // want empty matches to be considered exhaustive. - ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { - smallvec![NonExhaustive] - } - ty::Never => smallvec![], - _ if cx.is_uninhabited(pcx.ty) => smallvec![], - ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => smallvec![Single], - // This type is one for which we cannot list constructors, like `str` or `f64`. - _ => smallvec![NonExhaustive], - } -} - /// A wildcard constructor that we split relative to the constructors in the matrix, as explained /// at the top of the file. /// For splitting wildcards, there are two groups of constructors: there are the constructors @@ -926,9 +808,121 @@ pub(super) struct SplitWildcard<'tcx> { impl<'tcx> SplitWildcard<'tcx> { pub(super) fn new<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Self { - let matrix_ctors = Vec::new(); - let all_ctors = all_constructors(pcx); - SplitWildcard { matrix_ctors, all_ctors } + debug!("SplitWildcard::new({:?})", pcx.ty); + let cx = pcx.cx; + let make_range = |start, end| { + IntRange( + // `unwrap()` is ok because we know the type is an integer. + IntRange::from_range(cx.tcx, start, end, pcx.ty, &RangeEnd::Included).unwrap(), + ) + }; + // This determines the set of all possible constructors for the type `pcx.ty`. For numbers, + // arrays and slices we use ranges and variable-length slices when appropriate. + // + // If the `exhaustive_patterns` feature is enabled, we make sure to omit constructors that + // are statically impossible. E.g., for `Option`, we do not include `Some(_)` in the + // returned list of constructors. + // Invariant: this is empty if and only if the type is uninhabited (as determined by + // `cx.is_uninhabited()`). + let all_ctors = match pcx.ty.kind() { + ty::Bool => smallvec![make_range(0, 1)], + ty::Array(sub_ty, len) if len.try_eval_usize(cx.tcx, cx.param_env).is_some() => { + let len = len.eval_usize(cx.tcx, cx.param_env); + if len != 0 && cx.is_uninhabited(sub_ty) { + smallvec![] + } else { + smallvec![Slice(Slice::new(Some(len), VarLen(0, 0)))] + } + } + // Treat arrays of a constant but unknown length like slices. + ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { + let kind = if cx.is_uninhabited(sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; + smallvec![Slice(Slice::new(None, kind))] + } + ty::Adt(def, substs) if def.is_enum() => { + // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an + // additional "unknown" constructor. + // There is no point in enumerating all possible variants, because the user can't + // actually match against them all themselves. So we always return only the fictitious + // constructor. + // E.g., in an example like: + // + // ``` + // let err: io::ErrorKind = ...; + // match err { + // io::ErrorKind::NotFound => {}, + // } + // ``` + // + // we don't want to show every possible IO error, but instead have only `_` as the + // witness. + let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(pcx.ty); + + // If `exhaustive_patterns` is disabled and our scrutinee is an empty enum, we treat it + // as though it had an "unknown" constructor to avoid exposing its emptiness. The + // exception is if the pattern is at the top level, because we want empty matches to be + // considered exhaustive. + let is_secretly_empty = def.variants.is_empty() + && !cx.tcx.features().exhaustive_patterns + && !pcx.is_top_level; + + if is_secretly_empty || is_declared_nonexhaustive { + smallvec![NonExhaustive] + } else if cx.tcx.features().exhaustive_patterns { + // If `exhaustive_patterns` is enabled, we exclude variants known to be + // uninhabited. + def.variants + .iter() + .filter(|v| { + !v.uninhabited_from(cx.tcx, substs, def.adt_kind(), cx.param_env) + .contains(cx.tcx, cx.module) + }) + .map(|v| Variant(v.def_id)) + .collect() + } else { + def.variants.iter().map(|v| Variant(v.def_id)).collect() + } + } + ty::Char => { + smallvec![ + // The valid Unicode Scalar Value ranges. + make_range('\u{0000}' as u128, '\u{D7FF}' as u128), + make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), + ] + } + ty::Int(_) | ty::Uint(_) + if pcx.ty.is_ptr_sized_integral() + && !cx.tcx.features().precise_pointer_size_matching => + { + // `usize`/`isize` are not allowed to be matched exhaustively unless the + // `precise_pointer_size_matching` feature is enabled. So we treat those types like + // `#[non_exhaustive]` enums by returning a special unmatcheable constructor. + smallvec![NonExhaustive] + } + &ty::Int(ity) => { + let bits = Integer::from_attr(&cx.tcx, SignedInt(ity)).size().bits() as u128; + let min = 1u128 << (bits - 1); + let max = min - 1; + smallvec![make_range(min, max)] + } + &ty::Uint(uty) => { + let size = Integer::from_attr(&cx.tcx, UnsignedInt(uty)).size(); + let max = size.truncate(u128::MAX); + smallvec![make_range(0, max)] + } + // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot + // expose its emptiness. The exception is if the pattern is at the top level, because we + // want empty matches to be considered exhaustive. + ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { + smallvec![NonExhaustive] + } + ty::Never => smallvec![], + _ if cx.is_uninhabited(pcx.ty) => smallvec![], + ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => smallvec![Single], + // This type is one for which we cannot list constructors, like `str` or `f64`. + _ => smallvec![NonExhaustive], + }; + SplitWildcard { matrix_ctors: Vec::new(), all_ctors } } /// Pass a set of constructors relative to which to split this one. Don't call twice, it won't From 8b38b6859a5395520e4f466133db4ec4bfd0317d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 20 Dec 2020 14:22:53 +0000 Subject: [PATCH 07/13] Inline the constructor-specific `split` functions --- .../src/thir/pattern/deconstruct_pat.rs | 53 +++++++------------ 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 9e02cb2e36948..060e4fdf6f817 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -183,14 +183,6 @@ impl IntRange { Pat { ty, span: DUMMY_SP, kind: Box::new(kind) } } - /// Split this range, as described at the top of the file. - fn split<'p, 'tcx>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { - let mut split_range = SplitIntRange::new(self.clone()); - let intranges = pcx.matrix.head_ctors(pcx.cx).filter_map(|ctor| ctor.as_int_range()); - split_range.split(intranges.cloned()); - split_range.iter().map(IntRange).collect() - } - /// Lint on likely incorrect range patterns (#63987) pub(super) fn lint_overlapping_range_endpoints(&self, pcx: PatCtxt<'_, '_, '_>, hir_id: HirId) { if self.is_singleton() { @@ -403,19 +395,6 @@ impl Slice { self.kind.arity() } - /// Split this slice, as described at the top of the file. - fn split<'p, 'tcx>(self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { - let (self_prefix, self_suffix) = match self.kind { - VarLen(self_prefix, self_suffix) => (self_prefix, self_suffix), - _ => return smallvec![Slice(self)], - }; - - let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, self.array_len); - let slices = pcx.matrix.head_ctors(pcx.cx).filter_map(|c| c.as_slice()).map(|s| s.kind); - split_self.split(slices); - split_self.iter().map(Slice).collect() - } - /// See `Constructor::is_covered_by` fn is_covered_by(self, other: Self) -> bool { other.kind.covers_length(self.arity()) @@ -680,26 +659,32 @@ impl<'tcx> Constructor<'tcx> { debug!("Constructor::split({:#?}, {:#?})", self, pcx.matrix); match self { - Wildcard => Constructor::split_wildcard(pcx), + Wildcard => { + let mut split_wildcard = SplitWildcard::new(pcx); + split_wildcard.split(pcx); + split_wildcard.into_ctors(pcx) + } // Fast-track if the range is trivial. In particular, we don't do the overlapping // ranges check. - IntRange(ctor_range) if !ctor_range.is_singleton() => ctor_range.split(pcx), - Slice(slice @ Slice { kind: VarLen(..), .. }) => slice.split(pcx), + IntRange(ctor_range) if !ctor_range.is_singleton() => { + let mut split_range = SplitIntRange::new(ctor_range.clone()); + let intranges = + pcx.matrix.head_ctors(pcx.cx).filter_map(|ctor| ctor.as_int_range()); + split_range.split(intranges.cloned()); + split_range.iter().map(IntRange).collect() + } + &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { + let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); + let slices = + pcx.matrix.head_ctors(pcx.cx).filter_map(|c| c.as_slice()).map(|s| s.kind); + split_self.split(slices); + split_self.iter().map(Slice).collect() + } // Any other constructor can be used unchanged. _ => smallvec![self.clone()], } } - /// For wildcards, there are two groups of constructors: there are the constructors actually - /// present in the matrix (`head_ctors`), and the constructors not present (`missing_ctors`). - /// Two constructors that are not in the matrix will either both be caught (by a wildcard), or - /// both not be caught. Therefore we can keep the missing constructors grouped together. - fn split_wildcard<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Self; 1]> { - let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx); - split_wildcard.into_ctors(pcx) - } - /// Returns whether `self` is covered by `other`, i.e. whether `self` is a subset of `other`. /// For the simple cases, this is simply checking for equality. For the "grouped" constructors, /// this checks for inclusion. From 43d445c8d1ae9c6ecb8203dd97d96f0e245b042d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 20 Dec 2020 14:29:42 +0000 Subject: [PATCH 08/13] Pass `Matrix` explicitly instead of via `PatCtxt` --- .../src/thir/pattern/deconstruct_pat.rs | 49 ++++++++++++------- .../src/thir/pattern/usefulness.rs | 26 ++++++---- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 060e4fdf6f817..4860bb0fe8768 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -19,7 +19,7 @@ use rustc_middle::mir::Field; use rustc_middle::ty::layout::IntegerExt; use rustc_middle::ty::{self, Const, Ty, TyCtxt}; use rustc_session::lint; -use rustc_span::DUMMY_SP; +use rustc_span::{Span, DUMMY_SP}; use rustc_target::abi::{Integer, Size, VariantIdx}; use smallvec::{smallvec, SmallVec}; @@ -184,12 +184,18 @@ impl IntRange { } /// Lint on likely incorrect range patterns (#63987) - pub(super) fn lint_overlapping_range_endpoints(&self, pcx: PatCtxt<'_, '_, '_>, hir_id: HirId) { + pub(super) fn lint_overlapping_range_endpoints<'a, 'tcx: 'a>( + &self, + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator, Span)>, + column_count: usize, + hir_id: HirId, + ) { if self.is_singleton() { return; } - if pcx.matrix.column_count().unwrap_or(0) != 1 { + if column_count != 1 { // FIXME: for now, only check for overlapping ranges on simple range // patterns. Otherwise with the current logic the following is detected // as overlapping: @@ -203,9 +209,7 @@ impl IntRange { return; } - let overlaps: Vec<_> = pcx - .matrix - .head_ctors_and_spans(pcx.cx) + let overlaps: Vec<_> = ctors .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))) .filter(|(range, _)| self.suspicious_intersection(range)) .map(|(range, span)| (self.intersection(&range).unwrap(), span)) @@ -655,28 +659,33 @@ impl<'tcx> Constructor<'tcx> { /// This function may discard some irrelevant constructors if this preserves behavior and /// diagnostics. Eg. for the `_` case, we ignore the constructors already present in the /// matrix, unless all of them are. - pub(super) fn split<'p>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Self; 1]> { - debug!("Constructor::split({:#?}, {:#?})", self, pcx.matrix); + pub(super) fn split<'a>( + &self, + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator> + Clone, + ) -> SmallVec<[Self; 1]> + where + 'tcx: 'a, + { + debug!("Constructor::split({:#?})", self); match self { Wildcard => { let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx); + split_wildcard.split(pcx, ctors); split_wildcard.into_ctors(pcx) } // Fast-track if the range is trivial. In particular, we don't do the overlapping // ranges check. IntRange(ctor_range) if !ctor_range.is_singleton() => { let mut split_range = SplitIntRange::new(ctor_range.clone()); - let intranges = - pcx.matrix.head_ctors(pcx.cx).filter_map(|ctor| ctor.as_int_range()); + let intranges = ctors.filter_map(|ctor| ctor.as_int_range()); split_range.split(intranges.cloned()); split_range.iter().map(IntRange).collect() } &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); - let slices = - pcx.matrix.head_ctors(pcx.cx).filter_map(|c| c.as_slice()).map(|s| s.kind); + let slices = ctors.filter_map(|c| c.as_slice()).map(|s| s.kind); split_self.split(slices); split_self.iter().map(Slice).collect() } @@ -912,11 +921,17 @@ impl<'tcx> SplitWildcard<'tcx> { /// Pass a set of constructors relative to which to split this one. Don't call twice, it won't /// do what you want. - pub(super) fn split(&mut self, pcx: PatCtxt<'_, '_, 'tcx>) { - self.matrix_ctors = - pcx.matrix.head_ctors(pcx.cx).cloned().filter(|c| !c.is_wildcard()).collect(); + pub(super) fn split<'a>( + &mut self, + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator> + Clone, + ) where + 'tcx: 'a, + { // Since `all_ctors` never contains wildcards, this won't recurse further. - self.all_ctors = self.all_ctors.iter().flat_map(|ctor| ctor.split(pcx)).collect(); + self.all_ctors = + self.all_ctors.iter().flat_map(|ctor| ctor.split(pcx, ctors.clone())).collect(); + self.matrix_ctors = ctors.filter(|c| !c.is_wildcard()).cloned().collect(); } /// Whether there are any value constructors for this type that are not present in the matrix. diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 5da42e705df88..ad672b59ba4f0 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -358,8 +358,6 @@ impl<'a, 'tcx> MatchCheckCtxt<'a, 'tcx> { #[derive(Copy, Clone)] pub(super) struct PatCtxt<'a, 'p, 'tcx> { pub(super) cx: &'a MatchCheckCtxt<'p, 'tcx>, - /// Current state of the matrix. - pub(super) matrix: &'a Matrix<'p, 'tcx>, /// Type of the current column under investigation. pub(super) ty: Ty<'tcx>, /// Span of the current pattern under investigation. @@ -538,7 +536,7 @@ impl<'p, 'tcx> Matrix<'p, 'tcx> { pub(super) fn head_ctors<'a>( &'a self, cx: &'a MatchCheckCtxt<'p, 'tcx>, - ) -> impl Iterator> + Captures<'p> { + ) -> impl Iterator> + Captures<'p> + Clone { self.patterns.iter().map(move |r| r.head_ctor(cx)) } @@ -804,6 +802,7 @@ impl<'tcx> Usefulness<'tcx> { fn apply_constructor<'p>( self, pcx: PatCtxt<'_, 'p, 'tcx>, + matrix: &Matrix<'p, 'tcx>, // used to compute missing ctors ctor: &Constructor<'tcx>, ctor_wild_subpatterns: &Fields<'p, 'tcx>, ) -> Self { @@ -811,7 +810,7 @@ impl<'tcx> Usefulness<'tcx> { UsefulWithWitness(witnesses) => { let new_witnesses = if ctor.is_wildcard() { let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx); + split_wildcard.split(pcx, matrix.head_ctors(pcx.cx)); let new_patterns = split_wildcard.report_missing_patterns(pcx); witnesses .into_iter() @@ -968,7 +967,7 @@ fn is_useful<'p, 'tcx>( // FIXME(Nadrieril): Hack to work around type normalization issues (see #72476). let ty = matrix.heads().next().map(|r| r.ty).unwrap_or(v.head().ty); - let pcx = PatCtxt { cx, matrix, ty, span: v.head().span, is_top_level }; + let pcx = PatCtxt { cx, ty, span: v.head().span, is_top_level }; debug!("is_useful_expand_first_col: ty={:#?}, expanding {:#?}", pcx.ty, v.head()); @@ -995,20 +994,27 @@ fn is_useful<'p, 'tcx>( let v_ctor = v.head_ctor(cx); if let Constructor::IntRange(ctor_range) = &v_ctor { // Lint on likely incorrect range patterns (#63987) - ctor_range.lint_overlapping_range_endpoints(pcx, hir_id) + ctor_range.lint_overlapping_range_endpoints( + pcx, + matrix.head_ctors_and_spans(cx), + matrix.column_count().unwrap_or(0), + hir_id, + ) } // We split the head constructor of `v`. - let split_ctors = v_ctor.split(pcx); + let split_ctors = v_ctor.split(pcx, matrix.head_ctors(cx)); // For each constructor, we compute whether there's a value that starts with it that would // witness the usefulness of `v`. + let start_matrix = &matrix; let usefulnesses = split_ctors.into_iter().map(|ctor| { // We cache the result of `Fields::wildcards` because it is used a lot. let ctor_wild_subpatterns = Fields::wildcards(pcx, &ctor); - let matrix = pcx.matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns); + let spec_matrix = + start_matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns); let v = v.pop_head_constructor(&ctor_wild_subpatterns); let usefulness = - is_useful(pcx.cx, &matrix, &v, witness_preference, hir_id, is_under_guard, false); - usefulness.apply_constructor(pcx, &ctor, &ctor_wild_subpatterns) + is_useful(cx, &spec_matrix, &v, witness_preference, hir_id, is_under_guard, false); + usefulness.apply_constructor(pcx, start_matrix, &ctor, &ctor_wild_subpatterns) }); Usefulness::merge(usefulnesses) }; From 2a541cea354fe5866cf2addafe730c0d7c0dd0c2 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 20 Dec 2020 13:29:39 +0000 Subject: [PATCH 09/13] Completely rework the explanation of the algorithm --- .../src/thir/pattern/deconstruct_pat.rs | 164 +++++-- .../src/thir/pattern/usefulness.rs | 445 +++++++++--------- 2 files changed, 324 insertions(+), 285 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 4860bb0fe8768..f71a4829e8a29 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -1,6 +1,47 @@ -//! This module provides functions to deconstruct and reconstruct patterns into a constructor -//! applied to some fields. This is used by the `_match` module to compute pattern -//! usefulness/exhaustiveness. +//! [`super::usefulness`] explains most of what is happening in this file. As explained there, +//! values and patterns are made from constructors applied to fields. This file defines a +//! `Constructor` enum, a `Fields` struct, and various operations to manipulate them and convert +//! them from/to patterns. +//! +//! There's one idea that is not detailed in [`super::usefulness`] because the details are not +//! needed there: _constructor splitting_. +//! +//! # Constructor splitting +//! +//! The idea is as follows: given a constructor `c` and a matrix, we want to specialize in turn +//! with all the value constructors that are covered by `c`, and compute usefulness for each. +//! Instead of listing all those constructors (which is intractable), we group those value +//! constructors together as much as possible. Example: +//! +//! ``` +//! match (0, false) { +//! (0 ..=100, true) => {} // `p_1` +//! (50..=150, false) => {} // `p_2` +//! (0 ..=200, _) => {} // `q` +//! } +//! ``` +//! +//! The naive approach would try all numbers in the range `0..=200`. But we can be a lot more +//! clever: `0` and `1` for example will match the exact same rows, and return equivalent +//! witnesses. In fact all of `0..50` would. We can thus restrict our exploration to 4 +//! constructors: `0..50`, `50..=100`, `101..=150` and `151..=200`. That is enough and infinitely +//! more tractable. +//! +//! We capture this idea in a function `split(p_1 ... p_n, c)` which returns a list of constructors +//! `c'` covered by `c`. Given such a `c'`, we require that all value ctors `c''` covered by `c'` +//! return an equivalent set of witnesses after specializing and computing usefulness. +//! In the example above, witnesses for specializing by `c''` covered by `0..50` will only differ +//! in their first element. +//! +//! We usually also ask that the `c'` together cover all of the original `c`. However we allow +//! skipping some constructors as long as it doesn't change whether the resulting list of witnesses +//! is empty of not. We use this in the wildcard `_` case. +//! +//! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for +//! or-patterns; instead we just try the alternatives one-by-one. For details on splitting +//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`SplitIntRange`]; for slices, see +//! [`SplitVarLenSlice`]. + use self::Constructor::*; use self::SliceKind::*; @@ -260,13 +301,13 @@ enum IntBorder { AfterMax, } -/// A range of integers that is partitioned into disjoint subranges. -/// -/// This is fed an input of multiple ranges, and returns an output that covers the union of the -/// inputs but is split so that an output range only intersects an input range by being a subrange -/// of it. No output range straddles the boundary of one of the inputs. This does constructor +/// A range of integers that is partitioned into disjoint subranges. This does constructor /// splitting for integer ranges as explained at the top of the file. /// +/// This is fed multiple ranges, and returns an output that covers the input, but is split so that +/// the only intersections between an output range and a seen range are inclusions. No output range +/// straddles the boundary of one of the inputs. +/// /// The following input: /// ``` /// |-------------------------| // `self` @@ -405,54 +446,67 @@ impl Slice { } } -/// The exhaustiveness-checking paper does not include any details on checking variable-length -/// slice patterns. However, they may be matched by an infinite collection of fixed-length array -/// patterns. -/// -/// Checking the infinite set directly would take an infinite amount of time. However, it turns out -/// that for each finite set of patterns `P`, all sufficiently large array lengths are equivalent: -/// -/// Each slice `s` with a "sufficiently-large" length `l ≥ L` that applies to exactly the subset -/// `Pₜ` of `P` can be transformed to a slice `sₘ` for each sufficiently-large length `m` that -/// applies to exactly the same subset of `P`. -/// -/// Because of that, each witness for reachability-checking of one of the sufficiently-large -/// lengths can be transformed to an equally-valid witness of any other length, so we only have to -/// check slices of the "minimal sufficiently-large length" and less. -/// -/// Note that the fact that there is a *single* `sₘ` for each `m` not depending on the specific -/// pattern in `P` is important: if you look at the pair of patterns `[true, ..]` `[.., false]` -/// Then any slice of length ≥1 that matches one of these two patterns can be trivially turned to a -/// slice of any other length ≥1 that matches them and vice-versa, but the slice of length 2 -/// `[false, true]` that matches neither of these patterns can't be turned to a slice from length 1 -/// that matches neither of these patterns, so we have to consider slices from length 2 there. +/// This computes constructor splitting for variable-length slices, as explained at the top of the +/// file. /// -/// Now, to see that that length exists and find it, observe that slice patterns are either -/// "fixed-length" patterns (`[_, _, _]`) or "variable-length" patterns (`[_, .., _]`). +/// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, _, +/// _, y] | ...`. The corresponding value constructors are fixed-length array constructors above a +/// given minimum length. We obviously can't list all of this infinity of constructors. Thankfully, +/// it turns out that for each finite set of slice patterns, all sufficiently large array lengths +/// are equivalent. /// -/// For fixed-length patterns, all slices with lengths *longer* than the pattern's length have the -/// same outcome (of not matching), so as long as `L` is greater than the pattern's length we can -/// pick any `sₘ` from that length and get the same result. +/// Let's look at an example, where we are trying to split the last pattern: +/// ``` +/// match x { +/// [true, true, ..] => {} +/// [.., false, false] => {} +/// [..] => {} +/// } +/// ``` +/// Here are the results of specialization for the first few lengths: +/// ``` +/// // length 0 +/// [] => {} +/// // length 1 +/// [_] => {} +/// // length 2 +/// [true, true] => {} +/// [false, false] => {} +/// [_, _] => {} +/// // length 3 +/// [true, true, _ ] => {} +/// [_, false, false] => {} +/// [_, _, _ ] => {} +/// // length 4 +/// [true, true, _, _ ] => {} +/// [_, _, false, false] => {} +/// [_, _, _, _ ] => {} +/// // length 5 +/// [true, true, _, _, _ ] => {} +/// [_, _, _, false, false] => {} +/// [_, _, _, _, _ ] => {} +/// ``` /// -/// For variable-length patterns, the situation is more complicated, because as seen above the -/// precise value of `sₘ` matters. +/// If we went above length 5, we would simply be inserting more columns full of wildcards in the +/// middle. This means that the set of witnesses for length `l >= 5` if equivalent to the set for +/// any other `l' >= 5`: simply add or remove wildcards in the middle to convert between them. /// -/// However, for each variable-length pattern `p` with a prefix of length `plₚ` and suffix of -/// length `slₚ`, only the first `plₚ` and the last `slₚ` elements are examined. +/// This applies to any set of slice patterns: there will be a length `L` above which all length +/// behave the same. This is exactly what we need for constructor splitting. Therefore a +/// variable-length slice can be split into a variable-length slice of minimal length `L`, and many +/// fixed-length slices of lengths `< L`. /// -/// Therefore, as long as `L` is positive (to avoid concerns about empty types), all elements after -/// the maximum prefix length and before the maximum suffix length are not examined by any -/// variable-length pattern, and therefore can be added/removed without affecting them - creating -/// equivalent patterns from any sufficiently-large length. +/// For each variable-length pattern `p` with a prefix of length `plₚ` and suffix of length `slₚ`, +/// only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as long as `L` is +/// positive (to avoid concerns about empty types), all elements after the maximum prefix length +/// and before the maximum suffix length are not examined by any variable-length pattern, and +/// therefore can be added/removed without affecting them - creating equivalent patterns from any +/// sufficiently-large length. /// /// Of course, if fixed-length patterns exist, we must be sure that our length is large enough to /// miss them all, so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` /// /// `max_slice` below will be made to have arity `L`. -/// -/// For example, with the above pair of patterns, all elements but the first and last can be -/// added/removed, so any witness of length ≥2 (say, `[false, false, true]`) can be turned to a -/// witness from any other length ≥2. #[derive(Debug)] struct SplitVarLenSlice { /// If the type is an array, this is its size. @@ -787,11 +841,19 @@ impl<'tcx> Constructor<'tcx> { /// A wildcard constructor that we split relative to the constructors in the matrix, as explained /// at the top of the file. -/// For splitting wildcards, there are two groups of constructors: there are the constructors -/// actually present in the matrix (`matrix_ctors`), and the constructors not present. Two -/// constructors that are not in the matrix will either both be covered (by a wildcard), or both -/// not be covered by any given row. Therefore we can keep the missing constructors grouped -/// together. +/// +/// A constructor that is not present in the matrix rows will only be covered by the rows that have +/// wildcards. Thus we can group all of those constructors together; we call them "missing +/// constructors". Splitting a wildcard would therefore list all present constructors individually +/// (or grouped if they are integers or slices), and then all missing constructors together as a +/// group. +/// +/// However we can go further: since any constructor will match the wildcard rows, and having more +/// rows can only reduce the amount of usefulness witnesses, we can skip the present constructors +/// and only try the missing ones. +/// This will not preserve the whole list of witnesses, but will preserve whether the list is empty +/// or not. In fact this is quite natural from the point of view of diagnostics too. This is done +/// in `to_ctors`: in some cases we only return `Missing`. #[derive(Debug)] pub(super) struct SplitWildcard<'tcx> { /// Constructors seen in the matrix. diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index ad672b59ba4f0..4243108423072 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -12,296 +12,272 @@ //! //! ----- //! -//! This file includes the logic for exhaustiveness and usefulness checking for -//! pattern-matching. Specifically, given a list of patterns for a type, we can -//! tell whether: -//! (a) the patterns cover every possible constructor for the type (exhaustiveness) -//! (b) each pattern is necessary (usefulness) +//! This file includes the logic for exhaustiveness and reachability checking for pattern-matching. +//! Specifically, given a list of patterns for a type, we can tell whether: +//! (a) each pattern is reachable (reachability) +//! (b) the patterns cover every possible value for the type (exhaustiveness) //! -//! The algorithm implemented here is a modified version of the one described in -//! [this paper](http://moscova.inria.fr/~maranget/papers/warn/index.html). -//! However, to save future implementors from reading the original paper, we -//! summarise the algorithm here to hopefully save time and be a little clearer -//! (without being so rigorous). +//! The algorithm implemented here is a modified version of the one described in [this +//! paper](http://moscova.inria.fr/~maranget/papers/warn/index.html). We have however generalized +//! it to accomodate the variety of patterns that rust supports. We thus explain our version here, +//! without being as rigorous. //! -//! # Premise //! -//! The core of the algorithm revolves about a "usefulness" check. In particular, we -//! are trying to compute a predicate `U(P, p)` where `P` is a list of patterns (we refer to this as -//! a matrix). `U(P, p)` represents whether, given an existing list of patterns -//! `P_1 ..= P_m`, adding a new pattern `p` will be "useful" (that is, cover previously- -//! uncovered values of the type). +//! # Summary //! -//! If we have this predicate, then we can easily compute both exhaustiveness of an -//! entire set of patterns and the individual usefulness of each one. -//! (a) the set of patterns is exhaustive iff `U(P, _)` is false (i.e., adding a wildcard -//! match doesn't increase the number of values we're matching) -//! (b) a pattern `P_i` is not useful if `U(P[0..=(i-1), P_i)` is false (i.e., adding a -//! pattern to those that have come before it doesn't increase the number of values -//! we're matching). +//! The core of the algorithm is the notion of "usefulness". A pattern `q` is said to be *useful* +//! relative to another pattern `p` of the same type if there is a value that is matched by `q` and +//! not matched by `p`. This generalizes to many `p`s: `q` is useful wrt a list of patterns `p_1 .. +//! p_n` if there is a value that is matched by `q` and by none of the `p_i`. We write +//! `usefulness(p_1 .. p_n, q)` for a function that returns a list of such values. The aim of this +//! file is to compute it efficiently. //! -//! # Core concept +//! This is enough to compute reachability: a pattern in a `match` expression is reachable iff it +//! is useful wrt the patterns above it: +//! ```rust +//! match x { +//! Some(_) => ..., +//! None => ..., // reachable: `None` is matched by this but not the branch above +//! Some(0) => ..., // unreachable: all the values this matches are already matched by +//! // `Some(_)` above +//! } +//! ``` +//! +//! This is also enough to compute exhaustiveness: a match is exhaustive iff the wildcard `_` +//! pattern is _not_ useful wrt the patterns in the match. The values returned by `usefulness` are +//! used to tell the user which values are missing. +//! ```rust +//! match x { +//! Some(0) => ..., +//! None => ..., +//! // not exhaustive: `_` is useful because it matches `Some(1)` +//! } +//! ``` +//! +//! The entrypoint of this file is the [`compute_match_usefulness`] function, which computes +//! reachability for each match branch and exhaustiveness for the whole match. +//! +//! +//! # Constructors and fields +//! +//! Note: we will often abbreviate "constructor" as "ctor". //! -//! The idea that powers everything that is done in this file is the following: a value is made -//! from a constructor applied to some fields. Examples of constructors are `Some`, `None`, `(,)` -//! (the 2-tuple constructor), `Foo {..}` (the constructor for a struct `Foo`), and `2` (the -//! constructor for the number `2`). Fields are just a (possibly empty) list of values. +//! The idea that powers everything that is done in this file is the following: a (matcheable) +//! value is made from a constructor applied to a number of subvalues. Examples of constructors are +//! `Some`, `None`, `(,)` (the 2-tuple constructor), `Foo {..}` (the constructor for a struct +//! `Foo`), and `2` (the constructor for the number `2`). This is natural when we think of +//! pattern-matching, and this is the basis for what follows. //! -//! Some of the constructors listed above might feel weird: `None` and `2` don't take any -//! arguments. This is part of what makes constructors so general: we will consider plain values -//! like numbers and string literals to be constructors that take no arguments, also called "0-ary -//! constructors"; they are the simplest case of constructors. This allows us to see any value as -//! made up from a tree of constructors, each having a given number of children. For example: -//! `(None, Ok(0))` is made from 4 different constructors. +//! Some of the ctors listed above might feel weird: `None` and `2` don't take any arguments. +//! That's ok: those are ctors that take a list of 0 arguments; they are the simplest case of +//! ctors. We treat `2` as a ctor because `u64` and other number types behave exactly like a huge +//! `enum`, with one variant for each number. This allows us to see any matcheable value as made up +//! from a tree of ctors, each having a set number of children. For example: `Foo { bar: None, +//! baz: Ok(0) }` is made from 4 different ctors, namely `Foo{..}`, `None`, `Ok` and `0`. //! -//! This idea can be extended to patterns: a pattern captures a set of possible values, and we can -//! describe this set using constructors. For example, `Err(_)` captures all values of the type -//! `Result` that start with the `Err` constructor (for some choice of `T` and `E`). The -//! wildcard `_` captures all values of the given type starting with any of the constructors for -//! that type. +//! This idea can be extended to patterns: they are also made from constructors applied to fields. +//! A pattern for a given type is allowed to use all the ctors for values of that type (which we +//! call "value constructors"), but there are also pattern-only ctors. The most important one is +//! the wildcard (`_`), and the others are integer ranges (`0..=10`), variable-length slices (`[x, +//! ..]`), and or-patterns (`Ok(0) | Err(_)`). Examples of valid patterns are `42`, `Some(_)`, `Foo +//! { bar: Some(0) | None, baz: _ }`. Note that a binder in a pattern (e.g. `Some(x)`) matches the +//! same values as a wildcard (e.g. `Some(_)`), so we treat both as wildcards. //! -//! We use this to compute whether different patterns might capture a same value. Do the patterns -//! `Ok("foo")` and `Err(_)` capture a common value? The answer is no, because the first pattern -//! captures only values starting with the `Ok` constructor and the second only values starting -//! with the `Err` constructor. Do the patterns `Some(42)` and `Some(1..10)` intersect? They might, -//! since they both capture values starting with `Some`. To be certain, we need to dig under the -//! `Some` constructor and continue asking the question. This is the main idea behind the -//! exhaustiveness algorithm: by looking at patterns constructor-by-constructor, we can efficiently -//! figure out if some new pattern might capture a value that hadn't been captured by previous -//! patterns. +//! From this deconstruction we can compute whether a given value matches a given pattern; we +//! simply look at ctors one at a time. Given a pattern `p` and a value `v`, we want to compute +//! `matches!(v, p)`. It's mostly straightforward: we compare the head ctors and when they match +//! we compare their fields recursively. A few representative examples: //! -//! Constructors are represented by the `Constructor` enum, and its fields by the `Fields` enum. -//! Most of the complexity of this file resides in transforming between patterns and -//! (`Constructor`, `Fields`) pairs, handling all the special cases correctly. +//! - `matches!(v, _) := true` +//! - `matches!((v0, v1), (p0, p1)) := matches!(v0, p0) && matches!(v1, p1)` +//! - `matches!(Foo { bar: v0, baz: v1 }, Foo { bar: p0, baz: p1 }) := matches!(v0, p0) && matches!(v1, p1)` +//! - `matches!(Ok(v0), Ok(p0)) := matches!(v0, p0)` +//! - `matches!(Ok(v0), Err(p0)) := false` (incompatible variants) +//! - `matches!(v, 1..=100) := matches!(v, 1) || ... || matches!(v, 100)` +//! - `matches!([v0], [p0, .., p1]) := false` (incompatible lengths) +//! - `matches!([v0, v1, v2], [p0, .., p1]) := matches!(v0, p0) && matches!(v2, p1)` +//! - `matches!(v, p0 | p1) := matches!(v, p0) || matches!(v, p1)` //! -//! Caveat: this constructors/fields distinction doesn't quite cover every Rust value. For example -//! a value of type `Rc` doesn't fit this idea very well, nor do various other things. -//! However, this idea covers most of the cases that are relevant to exhaustiveness checking. +//! Constructors, fields and relevant operations are defined in the [`super::deconstruct_pat`] module. //! +//! Note: this constructors/fields distinction may not straightforwardly apply to every Rust type. +//! For example a value of type `Rc` can't be deconstructed that way, and `&str` has an +//! infinity of constructors. There are also subtleties with visibility of fields and +//! uninhabitedness and various other things. The constructors idea can be extended to handle most +//! of these subtleties though; caveats are documented where relevant throughout the code. //! -//! # Algorithm +//! Whether constructors cover each other is computed by [`Constructor::is_covered_by`]. //! -//! Recall that `U(P, p)` represents whether, given an existing list of patterns (aka matrix) `P`, -//! adding a new pattern `p` will cover previously-uncovered values of the type. -//! During the course of the algorithm, the rows of the matrix won't just be individual patterns, -//! but rather partially-deconstructed patterns in the form of a list of fields. The paper -//! calls those pattern-vectors, and we will call them pattern-stacks. The same holds for the -//! new pattern `p`. //! -//! For example, say we have the following: +//! # Specialization //! +//! Recall that we wish to compute `usefulness(p_1 .. p_n, q)`: given a list of patterns `p_1 .. +//! p_n` and a pattern `q`, all of the same type, we want to find a list of values (called +//! "witnesses") that are matched by `q` and by none of the `p_i`. We obviously don't just +//! enumerate all possible values. From the discussion above we see that we can proceed +//! ctor-by-ctor: for each value ctor of the given type, we ask "is there a value that starts with +//! this constructor and matches `q` and none of the `p_i`?". As we saw above, there's a lot we can +//! say from knowing only the first constructor of our candidate value. +//! +//! Let's take the following example: //! ``` -//! // x: (Option, Result<()>) //! match x { -//! (Some(true), _) => {} -//! (None, Err(())) => {} -//! (None, Err(_)) => {} +//! Enum::Variant1(_) => {} // `p1` +//! Enum::Variant2(None, 0) => {} // `p2` +//! Enum::Variant2(Some(_), 0) => {} // `q` //! } //! ``` //! -//! Here, the matrix `P` starts as: +//! We can easily see that if our candidate value `v` starts with `Variant1` it will not match `q`. +//! If `v = Variant2(v0, v1)` however, whether or not it matches `p2` and `q` will depend on `v0` +//! and `v1`. In fact, such a `v` will be a witness of usefulness of `q` exactly when the tuple +//! `(v0, v1)` is a witness of usefulness of `q'` in the following reduced match: //! //! ``` -//! [ -//! [(Some(true), _)], -//! [(None, Err(()))], -//! [(None, Err(_))], -//! ] +//! match x { +//! (None, 0) => {} // `p2'` +//! (Some(_), 0) => {} // `q'` +//! } //! ``` //! -//! We can tell it's not exhaustive, because `U(P, _)` is true (we're not covering -//! `[(Some(false), _)]`, for instance). In addition, row 3 is not useful, because -//! all the values it covers are already covered by row 2. +//! This motivates a new step in computing usefulness, that we call _specialization_. +//! Specialization consist of filtering a list of patterns for those that match a constructor, and +//! then looking into the constructor's fields. This enables usefulness to be computed recursively. //! -//! A list of patterns can be thought of as a stack, because we are mainly interested in the top of -//! the stack at any given point, and we can pop or apply constructors to get new pattern-stacks. -//! To match the paper, the top of the stack is at the beginning / on the left. +//! Instead of acting on a single pattern in each row, we will consider a list of patterns for each +//! row, and we call such a list a _pattern-stack_. The idea is that we will specialize the +//! leftmost pattern, which amounts to popping the constructor and pushing its fields, which feels +//! like a stack. We note a pattern-stack simply with `[p_1 ... p_n]`. +//! Here's a sequence of specializations of a list of pattern-stacks, to illustrate what's +//! happening: +//! ``` +//! [Enum::Variant1(_)] +//! [Enum::Variant2(None, 0)] +//! [Enum::Variant2(Some(_), 0)] +//! //==>> specialize with `Variant2` +//! [None, 0] +//! [Some(_), 0] +//! //==>> specialize with `Some` +//! [_, 0] +//! //==>> specialize with `true` (say the type was `bool`) +//! [0] +//! //==>> specialize with `0` +//! [] +//! ``` //! -//! There are two important operations on pattern-stacks necessary to understand the algorithm: +//! The function `specialize(c, p)` takes a value constructor `c` and a pattern `p`, and returns 0 +//! or more pattern-stacks. If `c` does not match the head constructor of `p`, it returns nothing; +//! otherwise if returns the fields of the constructor. This only returns more than one +//! pattern-stack if `p` has a pattern-only constructor. //! -//! 1. We can pop a given constructor off the top of a stack. This operation is called -//! `specialize`, and is denoted `S(c, p)` where `c` is a constructor (like `Some` or -//! `None`) and `p` a pattern-stack. -//! If the pattern on top of the stack can cover `c`, this removes the constructor and -//! pushes its arguments onto the stack. It also expands OR-patterns into distinct patterns. -//! Otherwise the pattern-stack is discarded. -//! This essentially filters those pattern-stacks whose top covers the constructor `c` and -//! discards the others. +//! - Specializing for the wrong constructor returns nothing //! -//! For example, the first pattern above initially gives a stack `[(Some(true), _)]`. If we -//! pop the tuple constructor, we are left with `[Some(true), _]`, and if we then pop the -//! `Some` constructor we get `[true, _]`. If we had popped `None` instead, we would get -//! nothing back. +//! `specialize(None, Some(p0)) := []` //! -//! This returns zero or more new pattern-stacks, as follows. We look at the pattern `p_1` -//! on top of the stack, and we have four cases: +//! - Specializing for the correct constructor returns a single row with the fields //! -//! 1.1. `p_1 = c(r_1, .., r_a)`, i.e. the top of the stack has constructor `c`. We -//! push onto the stack the arguments of this constructor, and return the result: -//! `r_1, .., r_a, p_2, .., p_n` +//! `specialize(Variant1, Variant1(p0, p1, p2)) := [[p0, p1, p2]]` //! -//! 1.2. `p_1 = c'(r_1, .., r_a')` where `c ≠ c'`. We discard the current stack and -//! return nothing. +//! `specialize(Foo{..}, Foo { bar: p0, baz: p1 }) := [[p0, p1]]` //! -//! 1.3. `p_1 = _`. We push onto the stack as many wildcards as the constructor `c` has -//! arguments (its arity), and return the resulting stack: -//! `_, .., _, p_2, .., p_n` +//! - For or-patterns, we specialize each branch and concatenate the results //! -//! 1.4. `p_1 = r_1 | r_2`. We expand the OR-pattern and then recurse on each resulting -//! stack: -//! - `S(c, (r_1, p_2, .., p_n))` -//! - `S(c, (r_2, p_2, .., p_n))` +//! `specialize(c, p0 | p1) := specialize(c, p0) ++ specialize(c, p1)` //! -//! 2. We can pop a wildcard off the top of the stack. This is called `S(_, p)`, where `p` is -//! a pattern-stack. Note: the paper calls this `D(p)`. -//! This is used when we know there are missing constructor cases, but there might be -//! existing wildcard patterns, so to check the usefulness of the matrix, we have to check -//! all its *other* components. +//! - We treat the other pattern constructors lik big or-patterns of all the possibilities: //! -//! It is computed as follows. We look at the pattern `p_1` on top of the stack, -//! and we have three cases: -//! 2.1. `p_1 = c(r_1, .., r_a)`. We discard the current stack and return nothing. -//! 2.2. `p_1 = _`. We return the rest of the stack: -//! p_2, .., p_n -//! 2.3. `p_1 = r_1 | r_2`. We expand the OR-pattern and then recurse on each resulting -//! stack. -//! - `S(_, (r_1, p_2, .., p_n))` -//! - `S(_, (r_2, p_2, .., p_n))` +//! `specialize(c, _) := specialize(c, Variant1(_) | Variant2(_, _) | ...)` //! -//! Note that the OR-patterns are not always used directly in Rust, but are used to derive the -//! exhaustive integer matching rules, so they're written here for posterity. +//! `specialize(c, 1..=100) := specialize(c, 1 | ... | 100)` //! -//! Both those operations extend straightforwardly to a list or pattern-stacks, i.e. a matrix, by -//! working row-by-row. Popping a constructor ends up keeping only the matrix rows that start with -//! the given constructor, and popping a wildcard keeps those rows that start with a wildcard. +//! `specialize(c, [p0, .., p1]) := specialize(c, [p0, p1] | [p0, _, p1] | [p0, _, _, p1] | ...)` //! +//! - If `c` is a pattern-only constructor, `specialize` is defined on a case-by-case basis. See +//! the discussion abount constructor splitting in [`super::deconstruct_pat`]. //! -//! The algorithm for computing `U` -//! ------------------------------- -//! The algorithm is inductive (on the number of columns: i.e., components of tuple patterns). -//! That means we're going to check the components from left-to-right, so the algorithm -//! operates principally on the first component of the matrix and new pattern-stack `p`. -//! This algorithm is realised in the `is_useful` function. //! -//! Base case. (`n = 0`, i.e., an empty tuple pattern) -//! - If `P` already contains an empty pattern (i.e., if the number of patterns `m > 0`), -//! then `U(P, p)` is false. -//! - Otherwise, `P` must be empty, so `U(P, p)` is true. +//! We then extend this function to work with pattern-stacks as input, by acting on the first +//! column and keeping the other columns untouched. //! -//! Inductive step. (`n > 0`, i.e., whether there's at least one column -//! [which may then be expanded into further columns later]) -//! We're going to match on the top of the new pattern-stack, `p_1`. -//! - If `p_1 == c(r_1, .., r_a)`, i.e. we have a constructor pattern. -//! Then, the usefulness of `p_1` can be reduced to whether it is useful when -//! we ignore all the patterns in the first column of `P` that involve other constructors. -//! This is where `S(c, P)` comes in: -//! `U(P, p) := U(S(c, P), S(c, p))` +//! Specialization for the whole matrix is done in [`Matrix::specialize_constructor`]. Note that +//! or-patterns in the first column are expanded before being stored in the matrix. Specialization +//! for a single patstack is done from a combination of [`Constructor::is_covered_by`] and +//! [`PatStack::pop_head_constructor`]. The internals of how it's done mostly live in the +//! [`Fields`] struct. //! -//! For example, if `P` is: //! -//! ``` -//! [ -//! [Some(true), _], -//! [None, 0], -//! ] -//! ``` +//! # Computing usefulness //! -//! and `p` is `[Some(false), 0]`, then we don't care about row 2 since we know `p` only -//! matches values that row 2 doesn't. For row 1 however, we need to dig into the -//! arguments of `Some` to know whether some new value is covered. So we compute -//! `U([[true, _]], [false, 0])`. +//! We now have all we need to compute usefulness. The inputs to usefulness are a list of +//! pattern-stacks `p_1 ... p_n` (one per row), and a new pattern_stack `q`. The paper and this +//! file calls the list of patstacks a _matrix_. They must all have the same number of columns and +//! the patterns in a given column must all have the same type. `usefulness` returns a (possibly +//! empty) list of witnesses of usefulness. These witnesses will also be pattern-stacks. //! -//! - If `p_1 == _`, then we look at the list of constructors that appear in the first -//! component of the rows of `P`: -//! + If there are some constructors that aren't present, then we might think that the -//! wildcard `_` is useful, since it covers those constructors that weren't covered -//! before. -//! That's almost correct, but only works if there were no wildcards in those first -//! components. So we need to check that `p` is useful with respect to the rows that -//! start with a wildcard, if there are any. This is where `S(_, x)` comes in: -//! `U(P, p) := U(S(_, P), S(_, p))` +//! - base case: `n_columns == 0`. +//! Since a pattern-stack functions like a tuple of patterns, an empty one functions like the +//! unit type. Thus `q` is useful iff there are no rows above it, i.e. if `n == 0`. //! -//! For example, if `P` is: +//! - inductive case: `n_columns > 0`. +//! We need a way to list the constructors we want to try. We will be more clever in the next +//! section but for now assume we list all value constructors for the type of the first column. //! -//! ``` -//! [ -//! [_, true, _], -//! [None, false, 1], -//! ] -//! ``` +//! - for each such ctor `c`: //! -//! and `p` is `[_, false, _]`, the `Some` constructor doesn't appear in `P`. So if we -//! only had row 2, we'd know that `p` is useful. However row 1 starts with a -//! wildcard, so we need to check whether `U([[true, _]], [false, 1])`. +//! - for each `q'` returned by `specialize(c, q)`: //! -//! + Otherwise, all possible constructors (for the relevant type) are present. In this -//! case we must check whether the wildcard pattern covers any unmatched value. For -//! that, we can think of the `_` pattern as a big OR-pattern that covers all -//! possible constructors. For `Option`, that would mean `_ = None | Some(_)` for -//! example. The wildcard pattern is useful in this case if it is useful when -//! specialized to one of the possible constructors. So we compute: -//! `U(P, p) := ∃(k ϵ constructors) U(S(k, P), S(k, p))` +//! - we compute `usefulness(specialize(c, p_1) ... specialize(c, p_n), q')` //! -//! For example, if `P` is: +//! - for each witness found, we revert specialization by pushing the constructor `c` on top. //! +//! - We return the concatenation of all the witnesses found, if any. +//! +//! Example: //! ``` -//! [ -//! [Some(true), _], -//! [None, false], -//! ] +//! [Some(true)] // p_1 +//! [None] // p_2 +//! [Some(_)] // q +//! //==>> try `None`: `specialize(None, q)` returns nothing +//! //==>> try `Some`: `specialize(Some, q)` returns a single row +//! [true] // p_1' +//! [_] // q' +//! //==>> try `true`: `specialize(true, q')` returns a single row +//! [] // p_1'' +//! [] // q'' +//! //==>> base case; `n != 0` so `q''` is not useful. +//! //==>> go back up a step +//! [true] // p_1' +//! [_] // q' +//! //==>> try `false`: `specialize(false, q')` returns a single row +//! [] // q'' +//! //==>> base case; `n == 0` so `q''` is useful. We return the single witness `[]` +//! witnesses: +//! [] +//! //==>> undo the specialization with `false` +//! witnesses: +//! [false] +//! //==>> undo the specialization with `Some` +//! witnesses: +//! [Some(false)] +//! //==>> we have tried all the constructors. The output is the single witness `[Some(false)]`. //! ``` //! -//! and `p` is `[_, false]`, both `None` and `Some` constructors appear in the first -//! components of `P`. We will therefore try popping both constructors in turn: we -//! compute `U([[true, _]], [_, false])` for the `Some` constructor, and `U([[false]], -//! [false])` for the `None` constructor. The first case returns true, so we know that -//! `p` is useful for `P`. Indeed, it matches `[Some(false), _]` that wasn't matched -//! before. +//! This computation is done in [`is_useful`]. In practice we don't care about the list of +//! witnesses when computing reachability; we only need to know whether any exist. We do keep the +//! witnesses when computing exhaustiveness to report them to the user. +//! //! -//! - If `p_1 == r_1 | r_2`, then the usefulness depends on each `r_i` separately: -//! `U(P, p) := U(P, (r_1, p_2, .., p_n)) -//! || U(P, (r_2, p_2, .., p_n))` +//! # Making usefulness tractable: constructor splitting //! -//! Modifications to the algorithm -//! ------------------------------ -//! The algorithm in the paper doesn't cover some of the special cases that arise in Rust, for -//! example uninhabited types and variable-length slice patterns. These are drawn attention to -//! throughout the code below. I'll make a quick note here about how exhaustive integer matching is -//! accounted for, though. +//! We're missing one last detail: which constructors do we list? Naively listing all value +//! constructors cannot work for types like `u64` or `&str`, so we need to be more clever. The +//! first obvious insight is that we only want to list constructors that are covered by the head +//! constructor of `q`. If it's a value constructor, we only try that one. If it's a pattern-only +//! constructor, we use the final clever idea for this algorithm: _constructor splitting_, where we +//! group together constructors that behave the same. //! -//! Exhaustive integer matching -//! --------------------------- -//! An integer type can be thought of as a (huge) sum type: 1 | 2 | 3 | ... -//! So to support exhaustive integer matching, we can make use of the logic in the paper for -//! OR-patterns. However, we obviously can't just treat ranges x..=y as individual sums, because -//! they are likely gigantic. So we instead treat ranges as constructors of the integers. This means -//! that we have a constructor *of* constructors (the integers themselves). We then need to work -//! through all the inductive step rules above, deriving how the ranges would be treated as -//! OR-patterns, and making sure that they're treated in the same way even when they're ranges. -//! There are really only four special cases here: -//! - When we match on a constructor that's actually a range, we have to treat it as if we would -//! an OR-pattern. -//! + It turns out that we can simply extend the case for single-value patterns in -//! `specialize` to either be *equal* to a value constructor, or *contained within* a range -//! constructor. -//! + When the pattern itself is a range, you just want to tell whether any of the values in -//! the pattern range coincide with values in the constructor range, which is precisely -//! intersection. -//! Since when encountering a range pattern for a value constructor, we also use inclusion, it -//! means that whenever the constructor is a value/range and the pattern is also a value/range, -//! we can simply use intersection to test usefulness. -//! - When we're testing for usefulness of a pattern and the pattern's first component is a -//! wildcard. -//! + If all the constructors appear in the matrix, we have a slight complication. By default, -//! the behaviour (i.e., a disjunction over specialised matrices for each constructor) is -//! invalid, because we want a disjunction over every *integer* in each range, not just a -//! disjunction over every range. This is a bit more tricky to deal with: essentially we need -//! to form equivalence classes of subranges of the constructor range for which the behaviour -//! of the matrix `P` and new pattern `p` are the same. This is described in more -//! detail in `Constructor::split`. -//! + If some constructors are missing from the matrix, it turns out we don't need to do -//! anything special (because we know none of the integers are actually wildcards: i.e., we -//! can't span wildcards using ranges). +//! The details are not necessary to understand this file, so we explain them in +//! [`super::deconstruct_pat`]. Splitting is done by the [`Constructor::split`] function. use self::Usefulness::*; use self::WitnessPreference::*; @@ -1025,7 +1001,7 @@ fn is_useful<'p, 'tcx>( /// The arm of a match expression. #[derive(Clone, Copy)] crate struct MatchArm<'p, 'tcx> { - /// The pattern must have been lowered through `MatchVisitor::lower_pattern`. + /// The pattern must have been lowered through `check_match::MatchVisitor::lower_pattern`. crate pat: &'p super::Pat<'tcx>, crate hir_id: HirId, crate has_guard: bool, @@ -1043,7 +1019,8 @@ crate struct UsefulnessReport<'p, 'tcx> { /// The entrypoint for the usefulness algorithm. Computes whether a match is exhaustive and which /// of its arms are reachable. /// -/// Note: the input patterns must have been lowered through `MatchVisitor::lower_pattern`. +/// Note: the input patterns must have been lowered through +/// `check_match::MatchVisitor::lower_pattern`. crate fn compute_match_usefulness<'p, 'tcx>( cx: &MatchCheckCtxt<'p, 'tcx>, arms: &[MatchArm<'p, 'tcx>], From 53e03fb7c10f39229241fedc001a2c28ffb569e6 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 20 Dec 2020 00:39:37 +0000 Subject: [PATCH 10/13] Make the special "missing patterns" constructor real --- .../src/thir/pattern/deconstruct_pat.rs | 108 ++++++++---------- .../src/thir/pattern/usefulness.rs | 13 ++- 2 files changed, 57 insertions(+), 64 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index f71a4829e8a29..da7b0e6250396 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -607,6 +607,9 @@ pub(super) enum Constructor<'tcx> { /// Fake extra constructor for enums that aren't allowed to be matched exhaustively. Also used /// for those types for which we cannot list constructors explicitly, like `f64` and `str`. NonExhaustive, + /// Stands for constructors that are not seen in the matrix, as explained in the documentation + /// for [`SplitWildcard`]. + Missing, /// Wildcard pattern. Wildcard, } @@ -758,8 +761,8 @@ impl<'tcx> Constructor<'tcx> { match (self, other) { // Wildcards cover anything (_, Wildcard) => true, - // Wildcards are only covered by wildcards - (Wildcard, _) => false, + // The missing ctors are not covered by anything in the matrix except wildcards. + (Missing | Wildcard, _) => false, (Single, Single) => true, (Variant(self_id), Variant(other_id)) => self_id == other_id, @@ -832,7 +835,7 @@ impl<'tcx> Constructor<'tcx> { .any(|other| slice.is_covered_by(other)), // This constructor is never covered by anything else NonExhaustive => false, - Str(..) | FloatRange(..) | Opaque | Wildcard => { + Str(..) | FloatRange(..) | Opaque | Missing | Wildcard => { span_bug!(pcx.span, "found unexpected ctor in all_ctors: {:?}", self) } } @@ -1002,7 +1005,7 @@ impl<'tcx> SplitWildcard<'tcx> { } /// Iterate over the constructors for this type that are not present in the matrix. - fn iter_missing<'a, 'p>( + pub(super) fn iter_missing<'a, 'p>( &'a self, pcx: PatCtxt<'a, 'p, 'tcx>, ) -> impl Iterator> + Captures<'p> { @@ -1013,64 +1016,45 @@ impl<'tcx> SplitWildcard<'tcx> { /// top of the file, if any constructors are missing we can ignore the present ones. fn into_ctors(self, pcx: PatCtxt<'_, '_, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { if self.any_missing(pcx) { - // Some constructors are missing, thus we can specialize with the wildcard constructor, - // which will stand for those constructors that are missing, and matches the same rows - // as any of them (namely the wildcard rows). - return smallvec![Wildcard]; + // Some constructors are missing, thus we can specialize with the special `Missing` + // constructor, which stands for those constructors that are not seen in the matrix, + // and matches the same rows as any of them (namely the wildcard rows). See the top of + // the file for details. + // However, when all constructors are missing we can also specialize with the full + // `Wildcard` constructor. The difference will depend on what we want in diagnostics. + + // If some constructors are missing, we typically want to report those constructors, + // e.g.: + // ``` + // enum Direction { N, S, E, W } + // let Direction::N = ...; + // ``` + // we can report 3 witnesses: `S`, `E`, and `W`. + // + // However, if the user didn't actually specify a constructor + // in this arm, e.g., in + // ``` + // let x: (Direction, Direction, bool) = ...; + // let (_, _, false) = x; + // ``` + // we don't want to show all 16 possible witnesses `(, , + // true)` - we are satisfied with `(_, _, true)`. So if all constructors are missing we + // prefer to report just a wildcard `_`. + // + // The exception is: if we are at the top-level, for example in an empty match, we + // sometimes prefer reporting the list of constructors instead of just `_`. + let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); + let ctor = if !self.matrix_ctors.is_empty() || report_when_all_missing { + Missing + } else { + Wildcard + }; + return smallvec![ctor]; } // All the constructors are present in the matrix, so we just go through them all. self.all_ctors } - - /// List the patterns corresponding to the missing constructors. In some cases, instead of - /// listing all constructors of a given type, we prefer to simply report a wildcard. - pub(super) fn report_missing_patterns<'p>( - &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - ) -> SmallVec<[Pat<'tcx>; 1]> { - // There are 2 ways we can report a witness here. - // Commonly, we can report all the "free" - // constructors as witnesses, e.g., if we have: - // - // ``` - // enum Direction { N, S, E, W } - // let Direction::N = ...; - // ``` - // - // we can report 3 witnesses: `S`, `E`, and `W`. - // - // However, there is a case where we don't want - // to do this and instead report a single `_` witness: - // if the user didn't actually specify a constructor - // in this arm, e.g., in - // - // ``` - // let x: (Direction, Direction, bool) = ...; - // let (_, _, false) = x; - // ``` - // - // we don't want to show all 16 possible witnesses - // `(, , true)` - we are - // satisfied with `(_, _, true)`. In this case, - // `used_ctors` is empty. - // The exception is: if we are at the top-level, for example in an empty match, we - // sometimes prefer reporting the list of constructors instead of just `_`. - let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); - if self.matrix_ctors.is_empty() && !report_when_all_missing { - // All constructors are unused. Report only a wildcard - // rather than each individual constructor. - smallvec![Pat::wildcard_from_ty(pcx.ty)] - } else { - // Construct for each missing constructor a "wild" version of this - // constructor, that matches everything that can be built with - // it. For example, if `ctor` is a `Constructor::Variant` for - // `Option::Some`, we get the pattern `Some(_)`. - self.iter_missing(pcx) - .map(|missing_ctor| Fields::wildcards(pcx, &missing_ctor).apply(pcx, missing_ctor)) - .collect() - } - } } /// Some fields need to be explicitly hidden away in certain cases; see the comment above the @@ -1211,9 +1195,8 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { } _ => bug!("bad slice pattern {:?} {:?}", constructor, ty), }, - Str(..) | FloatRange(..) | IntRange(..) | NonExhaustive | Opaque | Wildcard => { - Fields::empty() - } + Str(..) | FloatRange(..) | IntRange(..) | NonExhaustive | Opaque | Missing + | Wildcard => Fields::empty(), }; debug!("Fields::wildcards({:?}, {:?}) = {:#?}", constructor, ty, ret); ret @@ -1297,9 +1280,10 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { &FloatRange(lo, hi, end) => PatKind::Range(PatRange { lo, hi, end }), IntRange(range) => return range.to_pat(pcx.cx.tcx, pcx.ty), NonExhaustive => PatKind::Wild, + Wildcard => return Pat::wildcard_from_ty(pcx.ty), Opaque => bug!("we should not try to apply an opaque constructor"), - Wildcard => bug!( - "trying to apply a wildcard constructor; this should have been done in `apply_constructors`" + Missing => bug!( + "trying to apply the `Missing` constructor; this should have been done in `apply_constructors`" ), }; diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 4243108423072..f74cb6ade4770 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -784,10 +784,19 @@ impl<'tcx> Usefulness<'tcx> { ) -> Self { match self { UsefulWithWitness(witnesses) => { - let new_witnesses = if ctor.is_wildcard() { + let new_witnesses = if matches!(ctor, Constructor::Missing) { let mut split_wildcard = SplitWildcard::new(pcx); split_wildcard.split(pcx, matrix.head_ctors(pcx.cx)); - let new_patterns = split_wildcard.report_missing_patterns(pcx); + // Construct for each missing constructor a "wild" version of this + // constructor, that matches everything that can be built with + // it. For example, if `ctor` is a `Constructor::Variant` for + // `Option::Some`, we get the pattern `Some(_)`. + let new_patterns: Vec<_> = split_wildcard + .iter_missing(pcx) + .map(|missing_ctor| { + Fields::wildcards(pcx, missing_ctor).apply(pcx, missing_ctor) + }) + .collect(); witnesses .into_iter() .flat_map(|witness| { From 1c176d1150db4dbbc922c56abbc2cf0e8cf9abc1 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 19 Dec 2020 07:11:00 +0000 Subject: [PATCH 11/13] Simplify field filtering --- .../src/thir/pattern/deconstruct_pat.rs | 118 ++++++++---------- .../src/thir/pattern/usefulness.rs | 2 +- 2 files changed, 56 insertions(+), 64 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index da7b0e6250396..357735d6c16ca 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -1058,41 +1058,30 @@ impl<'tcx> SplitWildcard<'tcx> { } /// Some fields need to be explicitly hidden away in certain cases; see the comment above the -/// `Fields` struct. This struct represents such a potentially-hidden field. When a field is hidden -/// we still keep its type around. +/// `Fields` struct. This struct represents such a potentially-hidden field. #[derive(Debug, Copy, Clone)] pub(super) enum FilteredField<'p, 'tcx> { Kept(&'p Pat<'tcx>), - Hidden(Ty<'tcx>), + Hidden, } impl<'p, 'tcx> FilteredField<'p, 'tcx> { fn kept(self) -> Option<&'p Pat<'tcx>> { match self { FilteredField::Kept(p) => Some(p), - FilteredField::Hidden(_) => None, - } - } - - fn to_pattern(self) -> Pat<'tcx> { - match self { - FilteredField::Kept(p) => p.clone(), - FilteredField::Hidden(ty) => Pat::wildcard_from_ty(ty), + FilteredField::Hidden => None, } } } /// A value can be decomposed into a constructor applied to some fields. This struct represents /// those fields, generalized to allow patterns in each field. See also `Constructor`. +/// This is constructed from a constructor using [`Fields::wildcards()`]. /// /// If a private or `non_exhaustive` field is uninhabited, the code mustn't observe that it is -/// uninhabited. For that, we filter these fields out of the matrix. This is subtle because we -/// still need to have those fields back when going to/from a `Pat`. Most of this is handled -/// automatically in `Fields`, but when constructing or deconstructing `Fields` you need to be -/// careful. As a rule, when going to/from the matrix, use the filtered field list; when going -/// to/from `Pat`, use the full field list. -/// This filtering is uncommon in practice, because uninhabited fields are rarely used, so we avoid -/// it when possible to preserve performance. +/// uninhabited. For that, we filter these fields out of the matrix. This is handled automatically +/// in `Fields`. This filtering is uncommon in practice, because uninhabited fields are rare used, +/// so we avoid it when possible to preserve performance. #[derive(Debug, Clone)] pub(super) enum Fields<'p, 'tcx> { /// Lists of patterns that don't contain any filtered fields. @@ -1101,21 +1090,19 @@ pub(super) enum Fields<'p, 'tcx> { /// have not measured if it really made a difference. Slice(&'p [Pat<'tcx>]), Vec(SmallVec<[&'p Pat<'tcx>; 2]>), - /// Patterns where some of the fields need to be hidden. `kept_count` caches the number of - /// non-hidden fields. + /// Patterns where some of the fields need to be hidden. For all intents and purposes we only + /// care about the non-hidden fields. We need to keep the real field index for those fields; + /// we're morally storing a `Vec<(usize, &Pat)>` but what we do is more convenient. + /// `len` counts the number of non-hidden fields Filtered { fields: SmallVec<[FilteredField<'p, 'tcx>; 2]>, - kept_count: usize, + len: usize, }, } impl<'p, 'tcx> Fields<'p, 'tcx> { - fn empty() -> Self { - Fields::Slice(&[]) - } - - /// Construct a new `Fields` from the given pattern. Must not be used if the pattern is a field - /// of a struct/tuple/variant. + /// Internal use. Use `Fields::wildcards()` instead. + /// Must not be used if the pattern is a field of a struct/tuple/variant. fn from_single_pattern(pat: &'p Pat<'tcx>) -> Self { Fields::Slice(std::slice::from_ref(pat)) } @@ -1160,7 +1147,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { if has_no_hidden_fields { Fields::wildcards_from_tys(cx, field_tys) } else { - let mut kept_count = 0; + let mut len = 0; let fields = variant .fields .iter() @@ -1175,14 +1162,14 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { // order not to reveal the uninhabitedness of the whole // variant. if is_uninhabited && (!is_visible || is_non_exhaustive) { - FilteredField::Hidden(ty) + FilteredField::Hidden } else { - kept_count += 1; + len += 1; FilteredField::Kept(wildcard_from_ty(ty)) } }) .collect(); - Fields::Filtered { fields, kept_count } + Fields::Filtered { fields, len } } } } @@ -1196,7 +1183,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { _ => bug!("bad slice pattern {:?} {:?}", constructor, ty), }, Str(..) | FloatRange(..) | IntRange(..) | NonExhaustive | Opaque | Missing - | Wildcard => Fields::empty(), + | Wildcard => Fields::Slice(&[]), }; debug!("Fields::wildcards({:?}, {:?}) = {:#?}", constructor, ty, ret); ret @@ -1218,14 +1205,16 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { /// `self`: `[false]` /// returns `Some(false)` pub(super) fn apply(self, pcx: PatCtxt<'_, 'p, 'tcx>, ctor: &Constructor<'tcx>) -> Pat<'tcx> { - let mut subpatterns = self.all_patterns(); + let subpatterns_and_indices = self.patterns_and_indices(); + let mut subpatterns = subpatterns_and_indices.iter().map(|&(_, p)| p).cloned(); let pat = match ctor { Single | Variant(_) => match pcx.ty.kind() { ty::Adt(..) | ty::Tuple(..) => { - let subpatterns = subpatterns - .enumerate() - .map(|(i, p)| FieldPat { field: Field::new(i), pattern: p }) + // We want the real indices here. + let subpatterns = subpatterns_and_indices + .iter() + .map(|&(field, p)| FieldPat { field, pattern: p.clone() }) .collect(); if let ty::Adt(adt, substs) = pcx.ty.kind() { @@ -1290,39 +1279,42 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { Pat { ty: pcx.ty, span: DUMMY_SP, kind: Box::new(pat) } } - /// Returns the number of patterns from the viewpoint of match-checking, i.e. excluding hidden - /// fields. This is what we want in most cases in this file, the only exception being - /// conversion to/from `Pat`. + /// Returns the number of patterns. This is the same as the arity of the constructor used to + /// construct `self`. pub(super) fn len(&self) -> usize { match self { Fields::Slice(pats) => pats.len(), Fields::Vec(pats) => pats.len(), - Fields::Filtered { kept_count, .. } => *kept_count, + Fields::Filtered { len, .. } => *len, } } - /// Returns the complete list of patterns, including hidden fields. - fn all_patterns(self) -> impl Iterator> { - let pats: SmallVec<[_; 2]> = match self { - Fields::Slice(pats) => pats.iter().cloned().collect(), - Fields::Vec(pats) => pats.into_iter().cloned().collect(), + /// Returns the list of patterns along with the corresponding field indices. + fn patterns_and_indices(&self) -> SmallVec<[(Field, &'p Pat<'tcx>); 2]> { + match self { + Fields::Slice(pats) => { + pats.iter().enumerate().map(|(i, p)| (Field::new(i), p)).collect() + } + Fields::Vec(pats) => { + pats.iter().copied().enumerate().map(|(i, p)| (Field::new(i), p)).collect() + } Fields::Filtered { fields, .. } => { - // We don't skip any fields here. - fields.into_iter().map(|p| p.to_pattern()).collect() + // Indices must be relative to the full list of patterns + fields + .iter() + .enumerate() + .filter_map(|(i, p)| Some((Field::new(i), p.kept()?))) + .collect() } - }; - pats.into_iter() + } } - /// Returns the filtered list of patterns, not including hidden fields. - pub(super) fn filtered_patterns(self) -> SmallVec<[&'p Pat<'tcx>; 2]> { + /// Returns the list of patterns. + pub(super) fn into_patterns(self) -> SmallVec<[&'p Pat<'tcx>; 2]> { match self { Fields::Slice(pats) => pats.iter().collect(), Fields::Vec(pats) => pats, - Fields::Filtered { fields, .. } => { - // We skip hidden fields here - fields.into_iter().filter_map(|p| p.kept()).collect() - } + Fields::Filtered { fields, .. } => fields.iter().filter_map(|p| p.kept()).collect(), } } @@ -1338,10 +1330,10 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { } /// Overrides some of the fields with the provided patterns. This is used when a pattern - /// defines some fields but not all, for example `Foo { field1: Some(_), .. }`: here we start with a - /// `Fields` that is just one wildcard per field of the `Foo` struct, and override the entry - /// corresponding to `field1` with the pattern `Some(_)`. This is also used for slice patterns - /// for the same reason. + /// defines some fields but not all, for example `Foo { field1: Some(_), .. }`: here we start + /// with a `Fields` that is just one wildcard per field of the `Foo` struct, and override the + /// entry corresponding to `field1` with the pattern `Some(_)`. This is also used for slice + /// patterns for the same reason. fn replace_fields_indexed( &self, new_pats: impl IntoIterator)>, @@ -1369,8 +1361,8 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { fields } - /// Replaces contained fields with the given filtered list of patterns, e.g. taken from the - /// matrix. There must be `len()` patterns in `pats`. + /// Replaces contained fields with the given list of patterns. There must be `len()` patterns + /// in `pats`. pub(super) fn replace_fields( &self, cx: &MatchCheckCtxt<'p, 'tcx>, @@ -1379,7 +1371,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { let pats: &[_] = cx.pattern_arena.alloc_from_iter(pats); match self { - Fields::Filtered { fields, kept_count } => { + Fields::Filtered { fields, len } => { let mut pats = pats.iter(); let mut fields = fields.clone(); for f in &mut fields { @@ -1388,7 +1380,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { *p = pats.next().unwrap(); } } - Fields::Filtered { fields, kept_count: *kept_count } + Fields::Filtered { fields, len: *len } } _ => Fields::Slice(pats), } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index f74cb6ade4770..6c40abf3f20be 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -447,7 +447,7 @@ impl<'p, 'tcx> PatStack<'p, 'tcx> { // We pop the head pattern and push the new fields extracted from the arguments of // `self.head()`. let mut new_fields = - ctor_wild_subpatterns.replace_with_pattern_arguments(self.head()).filtered_patterns(); + ctor_wild_subpatterns.replace_with_pattern_arguments(self.head()).into_patterns(); new_fields.extend_from_slice(&self.pats[1..]); PatStack::from_vec(new_fields) } From 85fdb34d3acf631d36d493a65d45b7bd7571e55d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 22 Dec 2020 06:09:54 +0000 Subject: [PATCH 12/13] Apply suggestions from code review Co-authored-by: varkor --- .../src/thir/pattern/deconstruct_pat.rs | 10 +++++----- .../src/thir/pattern/usefulness.rs | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 357735d6c16ca..d79dd97a69a75 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -451,7 +451,7 @@ impl Slice { /// /// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, _, /// _, y] | ...`. The corresponding value constructors are fixed-length array constructors above a -/// given minimum length. We obviously can't list all of this infinity of constructors. Thankfully, +/// given minimum length. We obviously can't list this infinitude of constructors. Thankfully, /// it turns out that for each finite set of slice patterns, all sufficiently large array lengths /// are equivalent. /// @@ -491,7 +491,7 @@ impl Slice { /// middle. This means that the set of witnesses for length `l >= 5` if equivalent to the set for /// any other `l' >= 5`: simply add or remove wildcards in the middle to convert between them. /// -/// This applies to any set of slice patterns: there will be a length `L` above which all length +/// This applies to any set of slice patterns: there will be a length `L` above which all lengths /// behave the same. This is exactly what we need for constructor splitting. Therefore a /// variable-length slice can be split into a variable-length slice of minimal length `L`, and many /// fixed-length slices of lengths `< L`. @@ -736,8 +736,8 @@ impl<'tcx> Constructor<'tcx> { // ranges check. IntRange(ctor_range) if !ctor_range.is_singleton() => { let mut split_range = SplitIntRange::new(ctor_range.clone()); - let intranges = ctors.filter_map(|ctor| ctor.as_int_range()); - split_range.split(intranges.cloned()); + let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()); + split_range.split(int_ranges.cloned()); split_range.iter().map(IntRange).collect() } &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { @@ -1080,7 +1080,7 @@ impl<'p, 'tcx> FilteredField<'p, 'tcx> { /// /// If a private or `non_exhaustive` field is uninhabited, the code mustn't observe that it is /// uninhabited. For that, we filter these fields out of the matrix. This is handled automatically -/// in `Fields`. This filtering is uncommon in practice, because uninhabited fields are rare used, +/// in `Fields`. This filtering is uncommon in practice, because uninhabited fields are rarely used, /// so we avoid it when possible to preserve performance. #[derive(Debug, Clone)] pub(super) enum Fields<'p, 'tcx> { diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 6c40abf3f20be..803ffc0885e68 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -19,7 +19,7 @@ //! //! The algorithm implemented here is a modified version of the one described in [this //! paper](http://moscova.inria.fr/~maranget/papers/warn/index.html). We have however generalized -//! it to accomodate the variety of patterns that rust supports. We thus explain our version here, +//! it to accommodate the variety of patterns that Rust supports. We thus explain our version here, //! without being as rigorous. //! //! @@ -27,13 +27,13 @@ //! //! The core of the algorithm is the notion of "usefulness". A pattern `q` is said to be *useful* //! relative to another pattern `p` of the same type if there is a value that is matched by `q` and -//! not matched by `p`. This generalizes to many `p`s: `q` is useful wrt a list of patterns `p_1 .. -//! p_n` if there is a value that is matched by `q` and by none of the `p_i`. We write +//! not matched by `p`. This generalizes to many `p`s: `q` is useful w.r.t. a list of patterns +//! `p_1 .. p_n` if there is a value that is matched by `q` and by none of the `p_i`. We write //! `usefulness(p_1 .. p_n, q)` for a function that returns a list of such values. The aim of this //! file is to compute it efficiently. //! //! This is enough to compute reachability: a pattern in a `match` expression is reachable iff it -//! is useful wrt the patterns above it: +//! is useful w.r.t. the patterns above it: //! ```rust //! match x { //! Some(_) => ..., @@ -44,8 +44,8 @@ //! ``` //! //! This is also enough to compute exhaustiveness: a match is exhaustive iff the wildcard `_` -//! pattern is _not_ useful wrt the patterns in the match. The values returned by `usefulness` are -//! used to tell the user which values are missing. +//! pattern is _not_ useful w.r.t. the patterns in the match. The values returned by `usefulness` +//! are used to tell the user which values are missing. //! ```rust //! match x { //! Some(0) => ..., @@ -102,7 +102,7 @@ //! //! Note: this constructors/fields distinction may not straightforwardly apply to every Rust type. //! For example a value of type `Rc` can't be deconstructed that way, and `&str` has an -//! infinity of constructors. There are also subtleties with visibility of fields and +//! infinitude of constructors. There are also subtleties with visibility of fields and //! uninhabitedness and various other things. The constructors idea can be extended to handle most //! of these subtleties though; caveats are documented where relevant throughout the code. //! @@ -184,7 +184,8 @@ //! //! `specialize(c, p0 | p1) := specialize(c, p0) ++ specialize(c, p1)` //! -//! - We treat the other pattern constructors lik big or-patterns of all the possibilities: +//! - We treat the other pattern constructors as if they were a large or-pattern of all the +//! possibilities: //! //! `specialize(c, _) := specialize(c, Variant1(_) | Variant2(_, _) | ...)` //! @@ -193,7 +194,7 @@ //! `specialize(c, [p0, .., p1]) := specialize(c, [p0, p1] | [p0, _, p1] | [p0, _, _, p1] | ...)` //! //! - If `c` is a pattern-only constructor, `specialize` is defined on a case-by-case basis. See -//! the discussion abount constructor splitting in [`super::deconstruct_pat`]. +//! the discussion about constructor splitting in [`super::deconstruct_pat`]. //! //! //! We then extend this function to work with pattern-stacks as input, by acting on the first From be23694622609c27d52042be6b506bf69848acbe Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 22 Dec 2020 11:28:17 +0000 Subject: [PATCH 13/13] Fix a comment --- compiler/rustc_mir_build/src/thir/pattern/usefulness.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 803ffc0885e68..83fee380ccce7 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -852,10 +852,10 @@ enum WitnessPreference { /// We'll perform the following steps: /// 1. Start with an empty witness /// `Witness(vec![])` -/// 2. Push a witness `Some(_)` against the `None` -/// `Witness(vec![Some(_)])` -/// 3. Push a witness `true` against the `false` -/// `Witness(vec![Some(_), true])` +/// 2. Push a witness `true` against the `false` +/// `Witness(vec![true])` +/// 3. Push a witness `Some(_)` against the `None` +/// `Witness(vec![true, Some(_)])` /// 4. Apply the `Pair` constructor to the witnesses /// `Witness(vec![Pair(Some(_), true)])` ///