Skip to content

Commit

Permalink
Fix even more print page links. (#963)
Browse files Browse the repository at this point in the history
  • Loading branch information
ehuss authored and Dylan-DPC committed Jul 1, 2019
1 parent 4b569ed commit 228e99b
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 36 deletions.
6 changes: 2 additions & 4 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,10 @@ impl HtmlHandlebars {
let content = ch.content.clone();
let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);

let string_path = ch.path.parent().unwrap().display().to_string();

let fixed_content = utils::render_markdown_with_base(
let fixed_content = utils::render_markdown_with_path(
&ch.content,
ctx.html_config.curly_quotes,
&string_path,
Some(&ch.path),
);
print_content.push_str(&fixed_content);

Expand Down
74 changes: 64 additions & 10 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use regex::Regex;
use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag};

use std::borrow::Cow;
use std::fmt::Write;
use std::path::Path;

pub use self::string::take_lines;

Expand Down Expand Up @@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String {
normalize_id(trimmed)
}

fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
lazy_static! {
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
}

fn fix<'a>(dest: CowStr<'a>, base: &str) -> CowStr<'a> {
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{}{}", base, dest).into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if !base.is_empty() {
fixed_link.push_str(base);
fixed_link.push_str("/");
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}

if let Some(caps) = MD_LINK.captures(&dest) {
Expand All @@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
dest
}

fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
lazy_static! {
static ref HTML_LINK: Regex =
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
}

HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}

match event {
Event::Start(Tag::Link(link_type, dest, title)) => {
Event::Start(Tag::Link(link_type, fix(dest, with_base), title))
Event::Start(Tag::Link(link_type, fix(dest, path), title))
}
Event::Start(Tag::Image(link_type, dest, title)) => {
Event::Start(Tag::Image(link_type, fix(dest, with_base), title))
Event::Start(Tag::Image(link_type, fix(dest, path), title))
}
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
_ => event,
}
}

/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
render_markdown_with_base(text, curly_quotes, "")
render_markdown_with_path(text, curly_quotes, None)
}

pub fn new_cmark_parser(text: &str) -> Parser<'_> {
Expand All @@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
Parser::new_ext(text, opts)
}

pub fn render_markdown_with_base(text: &str, curly_quotes: bool, base: &str) -> String {
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
let mut s = String::with_capacity(text.len() * 3 / 2);
let p = new_cmark_parser(text);
let mut converter = EventQuoteConverter::new(curly_quotes);
let events = p
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, base))
.map(|event| adjust_links(event, path))
.map(|event| converter.convert(event));

html::push_html(&mut s, events);
Expand Down
8 changes: 8 additions & 0 deletions tests/dummy_book/src/second/nested.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
When we link to [the first section](../first/nested.md), it should work on
both the print page and the non-print page.

A [fragment link](#some-section) should work.

Link [outside](../../std/foo/bar.html).

![Some image](../images/picture.png)

<a href="../first/markdown.md">HTML Link</a>

<img src="../images/picture.png" alt="raw html">

## Some section
3 changes: 3 additions & 0 deletions tests/rendered_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ fn check_correct_relative_links_in_print_page() {
r##"<a href="second/../first/nested.html">the first section</a>,"##,
r##"<a href="second/../../std/foo/bar.html">outside</a>"##,
r##"<img src="second/../images/picture.png" alt="Some image" />"##,
r##"<a href="second/nested.html#some-section">fragment link</a>"##,
r##"<a href="second/../first/markdown.html">HTML Link</a>"##,
r##"<img src="second/../images/picture.png" alt="raw html">"##,
],
);
}
Expand Down
Loading

0 comments on commit 228e99b

Please sign in to comment.