Skip to content

Commit

Permalink
Ignore casing when processing markdown fragments + check for percent …
Browse files Browse the repository at this point in the history
…encoded ancors (#1535)

We must also check the fragment before it is percent-decoded as required by the HTML standard.

Fixes #1467
  • Loading branch information
autoantwort authored Oct 28, 2024
1 parent bc0b05b commit 9801590
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 14 deletions.
13 changes: 9 additions & 4 deletions fixtures/fragments/file.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
<title>For Testing Fragments</title>
</head>
<body>
<section id="in-the-beginning">
<p>
<section id="in-the-beginning" style="height: 100vh;">
<p id="Upper-ÄÖö">
<div id="tangent%3A-kustomize"></div>
To start
<a href="file1.md#fragment-1">
let's run away.
Expand All @@ -15,8 +16,12 @@
</section>
<section>
<p id="a-word">Word</p>
<a href="#in-the-beginning">back we go</a>
<a href="#in-the-end">doesn't exist</a>
<a href="#in-the-beginning">back we go</a><br>
<a href="#in-THE-begiNNing">back we go upper does not work</a><br>
<a href="#tangent%3A-kustomize">id with percent encoding</a><br>
<a href="#Upper-ÄÖö">back to Upper-ÄÖö</a><br>
<a href="#Upper-%C3%84%C3%96%C3%B6">back to öüä encoded</a><br>
<a href="#in-the-end">doesn't exist</a><br>
</section>
</body>
</html>
10 changes: 10 additions & 0 deletions fixtures/fragments/file1.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,14 @@ Therefore we put the test into a code block for now to prevent false positives.

[Link to another file type](empty_file#fragment)

# Ignore casing

[Link with wrong casing](#IGNORE-CASING)

# Fünf süße Äpfel

[Link to umlauts](#fünf-süße-äpfel)
[Link to umlauts wrong case](#fünf-sÜße-Äpfel)
[Link to umlauts with percent encoding](#f%C3%BCnf-s%C3%BC%C3%9Fe-%C3%A4pfel)

##### Lets wear a hat: être
8 changes: 4 additions & 4 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1673,10 +1673,10 @@ mod cli {
.stderr(contains(
"fixtures/fragments/file1.md#kebab-case-fragment-1",
))
.stdout(contains("15 Total"))
.stdout(contains("12 OK"))
// 3 failures because of missing fragments
.stdout(contains("3 Errors"));
.stdout(contains("21 Total"))
.stdout(contains("17 OK"))
// 4 failures because of missing fragments
.stdout(contains("4 Errors"));
}

#[test]
Expand Down
5 changes: 3 additions & 2 deletions lychee-lib/src/extract/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,11 @@ impl HeadingIdGenerator {
/// Converts text into kebab case
#[must_use]
fn into_kebab_case(text: &str) -> String {
text.chars()
text.to_lowercase()
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
Some(ch)
} else if ch.is_whitespace() {
Some('-')
} else {
Expand Down
18 changes: 14 additions & 4 deletions lychee-lib/src/utils/fragment_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,31 @@ impl FragmentChecker {
let Some(fragment) = url.fragment() else {
return Ok(true);
};
let fragment = percent_decode_str(fragment).decode_utf8()?;
let mut fragment_decoded = percent_decode_str(fragment).decode_utf8()?;
let url_without_frag = Self::remove_fragment(url.clone());

let extractor = match FileType::from(path) {
let file_type = FileType::from(path);
let extractor = match file_type {
FileType::Markdown => extract_markdown_fragments,
FileType::Html => extract_html_fragments,
FileType::Plaintext => return Ok(true),
};
if file_type == FileType::Markdown {
fragment_decoded = fragment_decoded.to_lowercase().into();
}
match self.cache.lock().await.entry(url_without_frag) {
Entry::Vacant(entry) => {
let content = fs::read_to_string(path).await?;
let file_frags = extractor(&content);
Ok(entry.insert(file_frags).contains(&fragment as &str))
let contains_fragment =
file_frags.contains(fragment) || file_frags.contains(&fragment_decoded as &str);
entry.insert(file_frags);
Ok(contains_fragment)
}
Entry::Occupied(entry) => {
Ok(entry.get().contains(fragment)
|| entry.get().contains(&fragment_decoded as &str))
}
Entry::Occupied(entry) => Ok(entry.get().contains(&fragment as &str)),
}
}

Expand Down

0 comments on commit 9801590

Please sign in to comment.