From 4c765f66a4f7921b4a47ab21f699bb2d8290b44a Mon Sep 17 00:00:00 2001 From: Camelid Date: Thu, 8 Oct 2020 22:24:34 -0700 Subject: [PATCH] Allow generic parameters in intra-doc links The contents of the generics will be mostly ignored (except for warning if fully-qualified syntax is used, which is currently unsupported in intra-doc links - see issue #74563). * Allow links like `Vec`, `Result`, and `Option>` * Allow links like `Vec::::new()` * Warn on * Unbalanced angle brackets (e.g. `Vec>`) * Missing type to apply generics to (`` or `>`) * Use of fully-qualified syntax (`::into_iter`) * Invalid path separator (`Vec::new`) * Too many angle brackets (`Vec<>`) * Empty angle brackets (`Vec<>`) Note that this implementation *does* allow some constructs that aren't valid in the actual Rust syntax, for example `Box::new()`. That may not be supported in rustdoc in the future; it is an implementation detail. --- src/librustdoc/lib.rs | 2 + .../passes/collect_intra_doc_links.rs | 203 +++++++++++++++++- src/test/rustdoc-ui/intra-link-errors.rs | 2 +- .../intra-link-malformed-generics.rs | 19 ++ .../intra-link-malformed-generics.stderr | 102 +++++++++ .../rustdoc/intra-doc-link-generic-params.rs | 55 +++++ 6 files changed, 381 insertions(+), 2 deletions(-) create mode 100644 src/test/rustdoc-ui/intra-link-malformed-generics.rs create mode 100644 src/test/rustdoc-ui/intra-link-malformed-generics.stderr create mode 100644 src/test/rustdoc/intra-doc-link-generic-params.rs diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 7762e8f8d4fb1..12726d2bd9a3e 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -3,11 +3,13 @@ html_playground_url = "https://play.rust-lang.org/" )] #![feature(rustc_private)] +#![feature(array_methods)] #![feature(box_patterns)] #![feature(box_syntax)] #![feature(in_band_lifetimes)] #![feature(nll)] #![feature(or_patterns)] +#![feature(peekable_next_if)] #![feature(test)] #![feature(crate_visibility_modifier)] #![feature(never_type)] diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index b9be3e2f92b46..33fa5a5033be2 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -23,6 +23,7 @@ use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; use std::cell::Cell; +use std::mem; use std::ops::Range; use crate::clean::*; @@ -65,10 +66,53 @@ enum ResolutionFailure<'a> { NotResolved { module_id: DefId, partial_res: Option, unresolved: Cow<'a, str> }, /// should not ever happen NoParentItem, + /// This link has malformed generic parameters; e.g., the angle brackets are unbalanced. + MalformedGenerics(MalformedGenerics), /// used to communicate that this should be ignored, but shouldn't be reported to the user Dummy, } +#[derive(Debug)] +enum MalformedGenerics { + /// This link has unbalanced angle brackets. + /// + /// For example, `Vec>`. + UnbalancedAngleBrackets, + /// The generics are not attached to a type. + /// + /// For example, `` should trigger this. + /// + /// This is detected by checking if the path is empty after the generics are stripped. + MissingType, + /// The link uses fully-qualified syntax, which is currently unsupported. + /// + /// For example, `::into_iter` should trigger this. + /// + /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside + /// angle brackets. + HasFullyQualifiedSyntax, + /// The link has an invalid path separator. + /// + /// For example, `Vec::new()` should trigger this. Note that `Vec:new()` will **not** + /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be + /// called. + /// + /// Note that this will also **not** be triggered if the invalid path separator is inside angle + /// brackets because rustdoc mostly ignores what's inside angle brackets (except for + /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)). + /// + /// This is detected by checking if there is a colon followed by a non-colon in the link. + InvalidPathSeparator, + /// The link has too many angle brackets. + /// + /// For example, `Vec<>` should trigger this. + TooManyAngleBrackets, + /// The link has empty angle brackets. + /// + /// For example, `Vec<>` should trigger this. + EmptyAngleBrackets, +} + impl ResolutionFailure<'a> { // This resolved fully (not just partially) but is erroneous for some other reason fn full_res(&self) -> Option { @@ -912,6 +956,7 @@ impl LinkCollector<'_, '_> { let link_text; let mut path_str; let disambiguator; + let stripped_path_string; let (mut res, mut fragment) = { path_str = if let Ok((d, path)) = Disambiguator::from_str(&link) { disambiguator = Some(d); @@ -922,7 +967,7 @@ impl LinkCollector<'_, '_> { } .trim(); - if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ch == ':' || ch == '_')) { + if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, ".contains(ch))) { return None; } @@ -985,6 +1030,36 @@ impl LinkCollector<'_, '_> { module_id = DefId { krate, index: CRATE_DEF_INDEX }; } + // Strip generics from the path. + if path_str.contains(['<', '>'].as_slice()) { + stripped_path_string = match strip_generics_from_path(path_str) { + Ok(path) => path, + Err(err_kind) => { + debug!("link has malformed generics: {}", path_str); + resolution_failure( + self, + &item, + path_str, + disambiguator, + dox, + link_range, + smallvec![err_kind], + ); + return None; + } + }; + path_str = &stripped_path_string; + } + + // Sanity check to make sure we don't have any angle brackets after stripping generics. + assert!(!path_str.contains(['<', '>'].as_slice())); + + // The link is not an intra-doc link if it still contains commas or spaces after + // stripping generics. + if path_str.contains([',', ' '].as_slice()) { + return None; + } + match self.resolve_with_disambiguator( disambiguator, item, @@ -1718,6 +1793,27 @@ fn resolution_failure( diag.level = rustc_errors::Level::Bug; "all intra doc links should have a parent item".to_owned() } + ResolutionFailure::MalformedGenerics(variant) => match variant { + MalformedGenerics::UnbalancedAngleBrackets => { + String::from("unbalanced angle brackets") + } + MalformedGenerics::MissingType => { + String::from("missing type for generic parameters") + } + MalformedGenerics::HasFullyQualifiedSyntax => { + diag.note("see https://github.com/rust-lang/rust/issues/74563 for more information"); + String::from("fully-qualified syntax is unsupported") + } + MalformedGenerics::InvalidPathSeparator => { + String::from("has invalid path separator") + } + MalformedGenerics::TooManyAngleBrackets => { + String::from("too many angle brackets") + } + MalformedGenerics::EmptyAngleBrackets => { + String::from("empty angle brackets") + } + }, }; if let Some(span) = sp { diag.span_label(span, ¬e); @@ -1908,3 +2004,108 @@ fn is_primitive(path_str: &str, ns: Namespace) -> Option<(&'static str, Res)> { fn primitive_impl(cx: &DocContext<'_>, path_str: &str) -> Option<&'static SmallVec<[DefId; 4]>> { Some(PrimitiveType::from_symbol(Symbol::intern(path_str))?.impls(cx.tcx)) } + +fn strip_generics_from_path(path_str: &str) -> Result> { + let mut stripped_segments = vec![]; + let mut path = path_str.chars().peekable(); + let mut segment = Vec::new(); + + while let Some(chr) = path.next() { + match chr { + ':' => { + if path.next_if_eq(&':').is_some() { + let stripped_segment = + strip_generics_from_path_segment(mem::take(&mut segment))?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } else { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::InvalidPathSeparator, + )); + } + } + '<' => { + segment.push(chr); + + match path.peek() { + Some('<') => { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::TooManyAngleBrackets, + )); + } + Some('>') => { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::EmptyAngleBrackets, + )); + } + Some(_) => { + segment.push(path.next().unwrap()); + + while let Some(chr) = path.next_if(|c| *c != '>') { + segment.push(chr); + } + } + None => break, + } + } + _ => segment.push(chr), + } + debug!("raw segment: {:?}", segment); + } + + if !segment.is_empty() { + let stripped_segment = strip_generics_from_path_segment(segment)?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } + + debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments); + + let stripped_path = stripped_segments.join("::"); + + if !stripped_path.is_empty() { + Ok(stripped_path) + } else { + Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::MissingType)) + } +} + +fn strip_generics_from_path_segment( + segment: Vec, +) -> Result> { + let mut stripped_segment = String::new(); + let mut param_depth = 0; + + let mut latest_generics_chunk = String::new(); + + for c in segment { + if c == '<' { + param_depth += 1; + latest_generics_chunk.clear(); + } else if c == '>' { + param_depth -= 1; + if latest_generics_chunk.contains(" as ") { + // The segment tries to use fully-qualified syntax, which is currently unsupported. + // Give a helpful error message instead of completely ignoring the angle brackets. + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::HasFullyQualifiedSyntax, + )); + } + } else { + if param_depth == 0 { + stripped_segment.push(c); + } else { + latest_generics_chunk.push(c); + } + } + } + + if param_depth == 0 { + Ok(stripped_segment) + } else { + // The segment has unbalanced angle brackets, e.g. `Vec>` + Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::UnbalancedAngleBrackets)) + } +} diff --git a/src/test/rustdoc-ui/intra-link-errors.rs b/src/test/rustdoc-ui/intra-link-errors.rs index ef928ae02f3e3..81e42643ae8fc 100644 --- a/src/test/rustdoc-ui/intra-link-errors.rs +++ b/src/test/rustdoc-ui/intra-link-errors.rs @@ -2,7 +2,7 @@ //~^ NOTE lint level is defined // FIXME: this should say that it was skipped (maybe an allowed by default lint?) -/// [] +/// [invalid intra-doc syntax!!] /// [path::to::nonexistent::module] //~^ ERROR unresolved link diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.rs b/src/test/rustdoc-ui/intra-link-malformed-generics.rs new file mode 100644 index 0000000000000..9c54092146fef --- /dev/null +++ b/src/test/rustdoc-ui/intra-link-malformed-generics.rs @@ -0,0 +1,19 @@ +#![deny(broken_intra_doc_links)] + +//! [Vec<] //~ ERROR +//! [Vec] //~ ERROR +//! [Vec>>] //~ ERROR +//! [Vec>>] //~ ERROR +//! [] //~ ERROR +//! [] //~ ERROR +//! [Vec::new()] //~ ERROR +//! [Vec<>] //~ ERROR +//! [Vec<>] //~ ERROR +//! [Vec<<>>] //~ ERROR + +// FIXME(#74563) support UFCS +//! [::into_iter] //~ ERROR +//! [ as IntoIterator>::iter] //~ ERROR diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.stderr b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr new file mode 100644 index 0000000000000..fe5d4cd1bbffa --- /dev/null +++ b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr @@ -0,0 +1,102 @@ +error: unresolved link to `Vec<` + --> $DIR/intra-link-malformed-generics.rs:3:6 + | +LL | //! [Vec<] + | ^^^^ unbalanced angle brackets + | +note: the lint level is defined here + --> $DIR/intra-link-malformed-generics.rs:1:9 + | +LL | #![deny(broken_intra_doc_links)] + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: unresolved link to `Vec $DIR/intra-link-malformed-generics.rs:4:6 + | +LL | //! [Vec` + --> $DIR/intra-link-malformed-generics.rs:5:6 + | +LL | //! [Vec] + | ^^^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `Vec>>` + --> $DIR/intra-link-malformed-generics.rs:6:6 + | +LL | //! [Vec>>] + | ^^^^^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `Vec>>` + --> $DIR/intra-link-malformed-generics.rs:7:6 + | +LL | //! [Vec>>] + | ^^^^^^^^ unbalanced angle brackets + +error: unresolved link to ` $DIR/intra-link-malformed-generics.rs:8:6 + | +LL | //! [ $DIR/intra-link-malformed-generics.rs:9:6 + | +LL | //! [Vec::<] + | ^^^^^^ unbalanced angle brackets + +error: unresolved link to `` + --> $DIR/intra-link-malformed-generics.rs:10:6 + | +LL | //! [] + | ^^^ missing type for generic parameters + +error: unresolved link to `` + --> $DIR/intra-link-malformed-generics.rs:11:6 + | +LL | //! [] + | ^^^^^^^^^^^^^^^^ missing type for generic parameters + +error: unresolved link to `Vec::new` + --> $DIR/intra-link-malformed-generics.rs:12:6 + | +LL | //! [Vec::new()] + | ^^^^^^^^^^^^^ has invalid path separator + +error: unresolved link to `Vec<>` + --> $DIR/intra-link-malformed-generics.rs:13:6 + | +LL | //! [Vec<>] + | ^^^^^^^^ too many angle brackets + +error: unresolved link to `Vec<>` + --> $DIR/intra-link-malformed-generics.rs:14:6 + | +LL | //! [Vec<>] + | ^^^^^ empty angle brackets + +error: unresolved link to `Vec<<>>` + --> $DIR/intra-link-malformed-generics.rs:15:6 + | +LL | //! [Vec<<>>] + | ^^^^^^^ too many angle brackets + +error: unresolved link to `::into_iter` + --> $DIR/intra-link-malformed-generics.rs:18:6 + | +LL | //! [::into_iter] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported + | + = note: see https://github.com/rust-lang/rust/issues/74563 for more information + +error: unresolved link to ` as IntoIterator>::iter` + --> $DIR/intra-link-malformed-generics.rs:19:6 + | +LL | //! [ as IntoIterator>::iter] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported + | + = note: see https://github.com/rust-lang/rust/issues/74563 for more information + +error: aborting due to 15 previous errors + diff --git a/src/test/rustdoc/intra-doc-link-generic-params.rs b/src/test/rustdoc/intra-doc-link-generic-params.rs new file mode 100644 index 0000000000000..0673575297425 --- /dev/null +++ b/src/test/rustdoc/intra-doc-link-generic-params.rs @@ -0,0 +1,55 @@ +// ignore-tidy-linelength + +#![crate_name = "foo"] + +//! Here's a link to [`Vec`] and one to [`Box>>`]. +//! Here's a link to [`Iterator>::Item`]. +//! +//! And what about a link to [just `Option`](Option) and, [with the generic, `Option`](Option)? +//! +//! We should also try linking to [`Result`]; it has *two* generics! +//! +//! Now let's test a trickier case: [`Vec::::new`], or you could write it +//! [with parentheses as `Vec::::new()`][Vec::::new()]. +//! And what about something even harder? That would be [`Vec::>::new()`]. +//! +//! This is also pretty tricky: [`TypeId::of::()`]. +//! And this too: [`Vec::::len`]. +//! +//! We unofficially and implicitly support things that aren't valid in the actual Rust syntax, like +//! [`Box::new()`]. We may not support them in the future! +//! +//! These will be resolved as regular links: +//! - [`this is first`](https://www.rust-lang.org) +//! - [`this is twice`] +//! - [` thrice`](https://www.rust-lang.org) +//! - [` four times`][rlo] +//! - [a < b][rlo] +//! - [c > d] +//! +//! [`this is twice`]: https://www.rust-lang.org +//! [rlo]: https://www.rust-lang.org +//! [c > d]: https://www.rust-lang.org + +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html"]' 'Vec' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html"]' 'Box>>' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/iter/traits/iterator/trait.Iterator.html#associatedtype.Item"]' 'Iterator>::Item' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'just Option' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'with the generic, Option' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/result/enum.Result.html"]' 'Result' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::::new' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'with parentheses as Vec::::new()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::>::new()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/any/struct.TypeId.html#method.of"]' 'TypeId::of::()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.len"]' 'Vec::::len' + +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html#method.new"]' 'Box::new()' + +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is first' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is twice' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' ' thrice' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' ' four times' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'a < b' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'c > d' + +use std::any::TypeId;