diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index 1520aa36c0..1490f8af18 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -33,12 +33,10 @@ impl HtmlHandlebars { let content = ch.content.clone(); let content = utils::render_markdown(&content, ctx.html_config.curly_quotes); - let string_path = ch.path.parent().unwrap().display().to_string(); - - let fixed_content = utils::render_markdown_with_base( + let fixed_content = utils::render_markdown_with_path( &ch.content, ctx.html_config.curly_quotes, - &string_path, + Some(&ch.path), ); print_content.push_str(&fixed_content); diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 37ea87b306..2632285ec9 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -8,6 +8,8 @@ use regex::Regex; use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag}; use std::borrow::Cow; +use std::fmt::Write; +use std::path::Path; pub use self::string::take_lines; @@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String { normalize_id(trimmed) } -fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> { +/// Fix links to the correct location. +/// +/// This adjusts links, such as turning `.md` extensions to `.html`. +/// +/// `path` is the path to the page being rendered relative to the root of the +/// book. This is used for the `print.html` page so that links on the print +/// page go to the original location. Normal page rendering sets `path` to +/// None. Ideally, print page links would link to anchors on the print page, +/// but that is very difficult. +fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { lazy_static! { static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap(); static ref MD_LINK: Regex = Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap(); } - fn fix<'a>(dest: CowStr<'a>, base: &str) -> CowStr<'a> { + fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + if dest.starts_with('#') { + // Fragment-only link. + if let Some(path) = path { + let mut base = path.display().to_string(); + if base.ends_with(".md") { + base.replace_range(base.len() - 3.., ".html"); + } + return format!("{}{}", base, dest).into(); + } else { + return dest; + } + } // Don't modify links with schemes like `https`. if !SCHEME_LINK.is_match(&dest) { // This is a relative link, adjust it as necessary. let mut fixed_link = String::new(); - if !base.is_empty() { - fixed_link.push_str(base); - fixed_link.push_str("/"); + if let Some(path) = path { + let base = path + .parent() + .expect("path can't be empty") + .to_str() + .expect("utf-8 paths only"); + if !base.is_empty() { + write!(fixed_link, "{}/", base).unwrap(); + } } if let Some(caps) = MD_LINK.captures(&dest) { @@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> { dest } + fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + // This is a terrible hack, but should be reasonably reliable. Nobody + // should ever parse a tag with a regex. However, there isn't anything + // in Rust that I know of that is suitable for handling partial html + // fragments like those generated by pulldown_cmark. + // + // There are dozens of HTML tags/attributes that contain paths, so + // feel free to add more tags if desired; these are the only ones I + // care about right now. + lazy_static! { + static ref HTML_LINK: Regex = + Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap(); + } + + HTML_LINK + .replace_all(&html, |caps: ®ex::Captures<'_>| { + let fixed = fix(caps[2].into(), path); + format!("{}{}\"", &caps[1], fixed) + }) + .into_owned() + .into() + } + match event { Event::Start(Tag::Link(link_type, dest, title)) => { - Event::Start(Tag::Link(link_type, fix(dest, with_base), title)) + Event::Start(Tag::Link(link_type, fix(dest, path), title)) } Event::Start(Tag::Image(link_type, dest, title)) => { - Event::Start(Tag::Image(link_type, fix(dest, with_base), title)) + Event::Start(Tag::Image(link_type, fix(dest, path), title)) } + Event::Html(html) => Event::Html(fix_html(html, path)), + Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)), _ => event, } } /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. pub fn render_markdown(text: &str, curly_quotes: bool) -> String { - render_markdown_with_base(text, curly_quotes, "") + render_markdown_with_path(text, curly_quotes, None) } pub fn new_cmark_parser(text: &str) -> Parser<'_> { @@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> { Parser::new_ext(text, opts) } -pub fn render_markdown_with_base(text: &str, curly_quotes: bool, base: &str) -> String { +pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String { let mut s = String::with_capacity(text.len() * 3 / 2); let p = new_cmark_parser(text); let mut converter = EventQuoteConverter::new(curly_quotes); let events = p .map(clean_codeblock_headers) - .map(|event| adjust_links(event, base)) + .map(|event| adjust_links(event, path)) .map(|event| converter.convert(event)); html::push_html(&mut s, events); diff --git a/tests/dummy_book/src/second/nested.md b/tests/dummy_book/src/second/nested.md index 2fa98bebc0..faf1187ff4 100644 --- a/tests/dummy_book/src/second/nested.md +++ b/tests/dummy_book/src/second/nested.md @@ -3,6 +3,14 @@ When we link to [the first section](../first/nested.md), it should work on both the print page and the non-print page. +A [fragment link](#some-section) should work. + Link [outside](../../std/foo/bar.html). ![Some image](../images/picture.png) + +HTML Link + +raw html + +## Some section diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs index 8b83ef04d8..1a10b4e7a8 100644 --- a/tests/rendered_output.rs +++ b/tests/rendered_output.rs @@ -124,6 +124,9 @@ fn check_correct_relative_links_in_print_page() { r##"the first section,"##, r##"outside"##, r##"Some image"##, + r##"fragment link"##, + r##"HTML Link"##, + r##"raw html"##, ], ); } diff --git a/tests/searchindex_fixture.json b/tests/searchindex_fixture.json index 824c8473b3..9683167320 100644 --- a/tests/searchindex_fixture.json +++ b/tests/searchindex_fixture.json @@ -15,6 +15,7 @@ "first/markdown.html#tasklisks", "second.html#second-chapter", "second/nested.html#testing-relative-links-for-the-print-page", + "second/nested.html#some-section", "conclusion.html#conclusion" ], "index": { @@ -51,11 +52,16 @@ "title": 2 }, "14": { - "body": 13, + "body": 18, "breadcrumbs": 7, "title": 5 }, "15": { + "body": 0, + "breadcrumbs": 3, + "title": 1 + }, + "16": { "body": 3, "breadcrumbs": 1, "title": 1 @@ -139,15 +145,21 @@ "title": "Second Chapter" }, "14": { - "body": "When we link to the first section , it should work on both the print page and the non-print page. Link outside . Some image", + "body": "When we link to the first section , it should work on both the print page and the non-print page. A fragment link should work. Link outside . Some image HTML Link", "breadcrumbs": "Second Chapter » Testing relative links for the print page", "id": "14", "title": "Testing relative links for the print page" }, "15": { + "body": "", + "breadcrumbs": "Second Chapter » Some section", + "id": "15", + "title": "Some section" + }, + "16": { "body": "I put <HTML> in here!", "breadcrumbs": "Conclusion", - "id": "15", + "id": "16", "title": "Conclusion" }, "2": { @@ -199,7 +211,7 @@ "title": "Tables" } }, - "length": 16, + "length": 17, "save": true }, "fields": [ @@ -499,7 +511,7 @@ "s": { "df": 2, "docs": { - "15": { + "16": { "tf": 1.0 }, "7": { @@ -701,6 +713,38 @@ } } } + }, + "r": { + "a": { + "df": 0, + "docs": {}, + "g": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "14": { + "tf": 1.0 + } + } + } + } + } + } + } + }, + "df": 0, + "docs": {} } }, "g": { @@ -746,7 +790,7 @@ "0": { "tf": 1.0 }, - "15": { + "16": { "tf": 1.0 } } @@ -784,6 +828,22 @@ }, "df": 0, "docs": {} + }, + "t": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "l": { + "df": 1, + "docs": { + "14": { + "tf": 1.0 + } + } + } + } } }, "i": { @@ -968,7 +1028,7 @@ "df": 1, "docs": { "14": { - "tf": 1.7320508075688772 + "tf": 2.23606797749979 } } } @@ -1021,7 +1081,7 @@ "t": { "df": 1, "docs": { - "15": { + "16": { "tf": 1.0 } } @@ -1382,7 +1442,7 @@ "t": { "df": 1, "docs": { - "15": { + "16": { "tf": 1.0 } } @@ -1505,11 +1565,14 @@ "df": 0, "docs": {}, "n": { - "df": 3, + "df": 4, "docs": { "14": { "tf": 1.0 }, + "15": { + "tf": 1.0 + }, "3": { "tf": 1.0 }, @@ -1793,7 +1856,7 @@ "df": 1, "docs": { "14": { - "tf": 1.0 + "tf": 1.4142135623730951 } } }, @@ -2051,7 +2114,7 @@ "df": 0, "docs": {}, "r": { - "df": 12, + "df": 13, "docs": { "10": { "tf": 1.0 @@ -2068,6 +2131,9 @@ "14": { "tf": 1.0 }, + "15": { + "tf": 1.0 + }, "2": { "tf": 1.4142135623730951 }, @@ -2129,7 +2195,7 @@ "s": { "df": 2, "docs": { - "15": { + "16": { "tf": 1.4142135623730951 }, "7": { @@ -2355,6 +2421,38 @@ } } } + }, + "r": { + "a": { + "df": 0, + "docs": {}, + "g": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "14": { + "tf": 1.0 + } + } + } + } + } + } + } + }, + "df": 0, + "docs": {} } }, "g": { @@ -2400,7 +2498,7 @@ "0": { "tf": 1.0 }, - "15": { + "16": { "tf": 1.0 } } @@ -2438,6 +2536,22 @@ }, "df": 0, "docs": {} + }, + "t": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "l": { + "df": 1, + "docs": { + "14": { + "tf": 1.0 + } + } + } + } } }, "i": { @@ -2622,7 +2736,7 @@ "df": 1, "docs": { "14": { - "tf": 2.0 + "tf": 2.449489742783178 } } } @@ -2675,7 +2789,7 @@ "t": { "df": 1, "docs": { - "15": { + "16": { "tf": 1.0 } } @@ -3036,7 +3150,7 @@ "t": { "df": 1, "docs": { - "15": { + "16": { "tf": 1.0 } } @@ -3135,7 +3249,7 @@ "docs": {}, "n": { "d": { - "df": 3, + "df": 4, "docs": { "13": { "tf": 1.4142135623730951 @@ -3143,6 +3257,9 @@ "14": { "tf": 1.0 }, + "15": { + "tf": 1.0 + }, "7": { "tf": 1.0 } @@ -3162,11 +3279,14 @@ "df": 0, "docs": {}, "n": { - "df": 3, + "df": 4, "docs": { "14": { "tf": 1.0 }, + "15": { + "tf": 1.4142135623730951 + }, "3": { "tf": 1.4142135623730951 }, @@ -3450,7 +3570,7 @@ "df": 1, "docs": { "14": { - "tf": 1.0 + "tf": 1.4142135623730951 } } }, @@ -3546,7 +3666,7 @@ "s": { "df": 1, "docs": { - "15": { + "16": { "tf": 1.0 } } @@ -3862,8 +3982,11 @@ "df": 0, "docs": {}, "n": { - "df": 2, + "df": 3, "docs": { + "15": { + "tf": 1.0 + }, "3": { "tf": 1.0 },