Skip to content

Commit

Permalink
fix handling of script content, bump html5gum (#181)
Browse files Browse the repository at this point in the history
* scriptdata bad testcase

* bump html5gum
  • Loading branch information
untitaker authored Oct 29, 2024
1 parent df88a08 commit 055a110
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 3 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ rayon = "1.5.1"
pulldown-cmark = "0.12.2"
blake3 = "1.0.0"

html5gum = "0.6.0"
html5gum = "0.6.1"
jwalk = "0.8.1"
bumpalo = { version = "3.11.1", features = ["collections"] }
percent-encoding = "2.1.0"
Expand Down
23 changes: 23 additions & 0 deletions src/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,3 +570,26 @@ fn test_document_join_bare_html() {
Href("locations/oslo#grünerløkka")
);
}

#[test]
fn test_json_script() {
use crate::paragraph::ParagraphHasher;

let doc = Document::new(
Path::new("/"),
Path::new("/html5gum/struct.Tokenizer.html"),
);

let html = r#"<script type="text/json" id="notable-traits-data">{"InfallibleTokenizer<R, E>":"<h3>Notable traits for <code><a class=\"struct\" href=\"struct.InfallibleTokenizer.html\" title=\"struct html5gum::InfallibleTokenizer\">InfallibleTokenizer</a>&lt;R, E&gt;</code></h3><pre><code><div class=\"where\">impl&lt;R: <a class=\"trait\" href=\"trait.Reader.html\" title=\"trait html5gum::Reader\">Reader</a>&lt;Error = <a class=\"enum\" href=\"https://doc.rust-lang.org/1.82.0/core/convert/enum.Infallible.html\" title=\"enum core::convert::Infallible\">Infallible</a>&gt;, E: <a class=\"trait\" href=\"emitters/trait.Emitter.html\" title=\"trait html5gum::emitters::Emitter\">Emitter</a>&gt; <a class=\"trait\" href=\"https://doc.rust-lang.org/1.82.0/core/iter/traits/iterator/trait.Iterator.html\" title=\"trait core::iter::traits::iterator::Iterator\">Iterator</a> for <a class=\"struct\" href=\"struct.InfallibleTokenizer.html\" title=\"struct html5gum::InfallibleTokenizer\">InfallibleTokenizer</a>&lt;R, E&gt;</div><div class=\"where\"> type <a href=\"https://doc.rust-lang.org/1.82.0/core/iter/traits/iterator/trait.Iterator.html#associatedtype.Item\" class=\"associatedtype\">Item</a> = E::<a class=\"associatedtype\" href=\"emitters/trait.Emitter.html#associatedtype.Token\" title=\"type html5gum::emitters::Emitter::Token\">Token</a>;</div>"}</script>"#;

let mut doc_buf = DocumentBuffers::default();

let links = doc
.links_from_read::<_, ParagraphHasher>(&mut doc_buf, html.as_bytes(), false)
.unwrap();

assert_eq!(
links.collect::<Vec<_>>(),
&[]
);
}

0 comments on commit 055a110

Please sign in to comment.