Skip to content

Commit

Permalink
Rollup merge of rust-lang#127127 - notriddle:notriddle/pulldown-cmark…
Browse files Browse the repository at this point in the history
…-0.11, r=GuillaumeGomez

rustdoc: update to pulldown-cmark 0.11

r? rustdoc

This pull request updates rustdoc to the latest version of pulldown-cmark. Along with adding new markdown extensions (which this PR doesn't enable), the new pulldown-cmark version also fixes a large number of bugs. Because all text files successfully parse as markdown, these bugfixes change the output, which can break people's existing docs.

A crater run, rust-lang#121659, has already been run for this change.

The first commit upgrades and fixes rustdoc. The second commit adds a lint for the footnote and block quote parser changes, which break the largest numbers of docs in the Crater run. The strikethrough change was mitigated in pulldown-cmark itself.

Unblocks rust-lang/rust-clippy#12876
  • Loading branch information
matthiaskrgr committed Jun 30, 2024
2 parents 6fc769b + a5fc783 commit a8cf593
Show file tree
Hide file tree
Showing 15 changed files with 383 additions and 92 deletions.
23 changes: 21 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3141,7 +3141,19 @@ dependencies = [
"bitflags 2.5.0",
"getopts",
"memchr",
"pulldown-cmark-escape",
"pulldown-cmark-escape 0.10.1",
"unicase",
]

[[package]]
name = "pulldown-cmark"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
dependencies = [
"bitflags 2.5.0",
"memchr",
"pulldown-cmark-escape 0.11.0",
"unicase",
]

Expand All @@ -3151,6 +3163,12 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"

[[package]]
name = "pulldown-cmark-escape"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"

[[package]]
name = "pulldown-cmark-to-cmark"
version = "13.0.0"
Expand Down Expand Up @@ -4604,7 +4622,7 @@ name = "rustc_resolve"
version = "0.0.0"
dependencies = [
"bitflags 2.5.0",
"pulldown-cmark 0.9.6",
"pulldown-cmark 0.11.0",
"rustc_arena",
"rustc_ast",
"rustc_ast_pretty",
Expand Down Expand Up @@ -4887,6 +4905,7 @@ dependencies = [
"indexmap",
"itertools",
"minifier",
"pulldown-cmark 0.9.6",
"regex",
"rustdoc-json-types",
"serde",
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_resolve/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
pulldown-cmark = { version = "0.9.6", default-features = false }
pulldown-cmark = { version = "0.11", features = ["html"], default-features = false }
rustc_arena = { path = "../rustc_arena" }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
Expand Down
14 changes: 9 additions & 5 deletions compiler/rustc_resolve/src/rustdoc.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
use pulldown_cmark::{
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
};
use rustc_ast as ast;
use rustc_ast::util::comments::beautify_doc_string;
use rustc_data_structures::fx::FxHashMap;
Expand Down Expand Up @@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {

while let Some(event) = event_iter.next() {
match event {
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
if may_be_doc_link(link_type) =>
{
if matches!(
link_type,
LinkType::Inline
Expand All @@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
}
}

links.push(preprocess_link(&dest));
links.push(preprocess_link(&dest_url));
}
_ => {}
}
Expand All @@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
}

/// Collects additional data of link.
fn collect_link_data<'input, 'callback>(
event_iter: &mut Parser<'input, 'callback>,
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
event_iter: &mut Parser<'input, F>,
) -> Option<Box<str>> {
let mut display_text: Option<String> = None;
let mut append_text = |text: CowStr<'_>| {
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ base64 = "0.21.7"
itertools = "0.12"
indexmap = "2"
minifier = "0.3.0"
pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0"
Expand Down
111 changes: 54 additions & 57 deletions src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ use crate::html::render::small_url_encode;
use crate::html::toc::TocBuilder;

use pulldown_cmark::{
html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag,
html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter,
Options, Parser, Tag, TagEnd,
};

#[cfg(test)]
Expand Down Expand Up @@ -230,7 +231,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
let mut original_text = String::new();
for event in &mut self.inner {
match event {
Event::End(Tag::CodeBlock(..)) => break,
Event::End(TagEnd::CodeBlock) => break,
Event::Text(ref s) => {
original_text.push_str(s);
}
Expand Down Expand Up @@ -359,16 +360,17 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
match &mut event {
// This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]`
// Remove any disambiguator.
Some(Event::Start(Tag::Link(
Some(Event::Start(Tag::Link {
// [fn@f] or [fn@f][]
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
dest,
link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
dest_url,
title,
))) => {
debug!("saw start of shortcut link to {dest} with title {title}");
..
})) => {
debug!("saw start of shortcut link to {dest_url} with title {title}");
// If this is a shortcut link, it was resolved by the broken_link_callback.
// So the URL will already be updated properly.
let link = self.links.iter().find(|&link| *link.href == **dest);
let link = self.links.iter().find(|&link| *link.href == **dest_url);
// Since this is an external iterator, we can't replace the inner text just yet.
// Store that we saw a link so we know to replace it later.
if let Some(link) = link {
Expand All @@ -381,16 +383,9 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
}
}
// Now that we're done with the shortcut link, don't replace any more text.
Some(Event::End(Tag::Link(
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
dest,
_,
))) => {
debug!("saw end of shortcut link to {dest}");
if self.links.iter().any(|link| *link.href == **dest) {
assert!(self.shortcut_link.is_some(), "saw closing link without opening tag");
self.shortcut_link = None;
}
Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => {
debug!("saw end of shortcut link");
self.shortcut_link = None;
}
// Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link.
// [`fn@f`]
Expand Down Expand Up @@ -433,9 +428,11 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
}
// If this is a link, but not a shortcut link,
// replace the URL, since the broken_link_callback was not called.
Some(Event::Start(Tag::Link(_, dest, title))) => {
if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) {
*dest = CowStr::Borrowed(link.href.as_ref());
Some(Event::Start(Tag::Link { dest_url, title, .. })) => {
if let Some(link) =
self.links.iter().find(|&link| *link.original_text == **dest_url)
{
*dest_url = CowStr::Borrowed(link.href.as_ref());
if title.is_empty() && !link.tooltip.is_empty() {
*title = CowStr::Borrowed(link.tooltip.as_ref());
}
Expand Down Expand Up @@ -477,9 +474,9 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
self.stored_events.push_back(Event::Start(Tag::Table(t)));
Event::Html(CowStr::Borrowed("<div>"))
}
Event::End(Tag::Table(t)) => {
Event::End(TagEnd::Table) => {
self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
Event::End(Tag::Table(t))
Event::End(TagEnd::Table)
}
e => e,
})
Expand Down Expand Up @@ -519,11 +516,11 @@ impl<'a, 'b, 'ids, I: Iterator<Item = SpannedEvent<'a>>> Iterator
}

let event = self.inner.next();
if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event {
if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
let mut id = String::new();
for event in &mut self.inner {
match &event.0 {
Event::End(Tag::Heading(..)) => break,
Event::End(TagEnd::Heading(_)) => break,
Event::Text(text) | Event::Code(text) => {
id.extend(text.chars().filter_map(slugify));
self.buf.push_back(event);
Expand Down Expand Up @@ -566,27 +563,27 @@ impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
}
}

fn check_if_allowed_tag(t: &Tag<'_>) -> bool {
fn check_if_allowed_tag(t: &TagEnd) -> bool {
matches!(
t,
Tag::Paragraph
| Tag::Emphasis
| Tag::Strong
| Tag::Strikethrough
| Tag::Link(..)
| Tag::BlockQuote
TagEnd::Paragraph
| TagEnd::Emphasis
| TagEnd::Strong
| TagEnd::Strikethrough
| TagEnd::Link
| TagEnd::BlockQuote
)
}

fn is_forbidden_tag(t: &Tag<'_>) -> bool {
fn is_forbidden_tag(t: &TagEnd) -> bool {
matches!(
t,
Tag::CodeBlock(_)
| Tag::Table(_)
| Tag::TableHead
| Tag::TableRow
| Tag::TableCell
| Tag::FootnoteDefinition(_)
TagEnd::CodeBlock
| TagEnd::Table
| TagEnd::TableHead
| TagEnd::TableRow
| TagEnd::TableCell
| TagEnd::FootnoteDefinition
)
}

Expand All @@ -604,12 +601,12 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
let mut is_start = true;
let is_allowed_tag = match event {
Event::Start(ref c) => {
if is_forbidden_tag(c) {
if is_forbidden_tag(&c.to_end()) {
self.skipped_tags += 1;
return None;
}
self.depth += 1;
check_if_allowed_tag(c)
check_if_allowed_tag(&c.to_end())
}
Event::End(ref c) => {
if is_forbidden_tag(c) {
Expand All @@ -633,7 +630,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
if is_start {
Some(Event::Start(Tag::Paragraph))
} else {
Some(Event::End(Tag::Paragraph))
Some(Event::End(TagEnd::Paragraph))
}
} else {
Some(event)
Expand Down Expand Up @@ -679,7 +676,7 @@ impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
let mut content = Vec::new();
for (event, _) in &mut self.inner {
if let Event::End(Tag::FootnoteDefinition(..)) = event {
if let Event::End(TagEnd::FootnoteDefinition) = event {
break;
}
content.push(event);
Expand All @@ -696,7 +693,7 @@ impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
for (mut content, id) in v {
write!(ret, "<li id=\"fn{id}\">").unwrap();
let mut is_paragraph = false;
if let Some(&Event::End(Tag::Paragraph)) = content.last() {
if let Some(&Event::End(TagEnd::Paragraph)) = content.last() {
content.pop();
is_paragraph = true;
}
Expand Down Expand Up @@ -806,7 +803,7 @@ pub(crate) fn find_codes<T: doctest::DoctestVisitor>(
tests.visit_test(text, block_info, line);
prev_offset = offset.start;
}
Event::Start(Tag::Heading(level, _, _)) => {
Event::Start(Tag::Heading { level, .. }) => {
register_header = Some(level as u32);
}
Event::Text(ref s) if register_header.is_some() => {
Expand Down Expand Up @@ -1432,7 +1429,7 @@ impl MarkdownItemInfo<'_> {

// Treat inline HTML as plain text.
let p = p.map(|event| match event.0 {
Event::Html(text) => (Event::Text(text), event.1),
Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
_ => event,
});

Expand All @@ -1442,7 +1439,7 @@ impl MarkdownItemInfo<'_> {
let p = Footnotes::new(p);
let p = TableWrapper::new(p.map(|(ev, _)| ev));
let p = p.filter(|event| {
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph))
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
});
html::push_html(&mut s, p);

Expand Down Expand Up @@ -1472,7 +1469,7 @@ impl MarkdownSummaryLine<'_> {
let mut s = String::new();

let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph))
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
});

html::push_html(&mut s, without_paragraphs);
Expand Down Expand Up @@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit(
_ => {}
},
Event::End(tag) => match tag {
Tag::Emphasis | Tag::Strong => buf.close_tag(),
Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()),
TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
_ => {}
},
Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
Expand Down Expand Up @@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin
}
Event::HardBreak | Event::SoftBreak => s.push(' '),
Event::Start(Tag::CodeBlock(..)) => break,
Event::End(Tag::Paragraph) => break,
Event::End(Tag::Heading(..)) => break,
Event::End(TagEnd::Paragraph) => break,
Event::End(TagEnd::Heading(..)) => break,
_ => (),
}
}
Expand Down Expand Up @@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>(

while let Some((event, span)) = event_iter.next() {
match event {
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => {
let range = match link_type {
// Link is pulled from the link itself.
LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
Expand All @@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>(
LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
// Link is pulled from elsewhere in the document.
LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
span_for_link(&dest, span)
span_for_link(&dest_url, span)
}
LinkType::Autolink | LinkType::Email => unreachable!(),
};
Expand All @@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>(

if let Some(link) = preprocess_link(MarkdownLink {
kind: link_type,
link: dest.into_string(),
link: dest_url.into_string(),
display_text,
range,
}) {
Expand All @@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>(
}

/// Collects additional data of link.
fn collect_link_data<'input, 'callback>(
event_iter: &mut OffsetIter<'input, 'callback>,
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
event_iter: &mut OffsetIter<'input, F>,
) -> Option<String> {
let mut display_text: Option<String> = None;
let mut append_text = |text: CowStr<'_>| {
Expand Down
Loading

0 comments on commit a8cf593

Please sign in to comment.