Skip to content

Commit

Permalink
chore(test): fix cases
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Sep 23, 2024
1 parent 2ac4f09 commit 43b5d3b
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 160 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fast_html2md"
version = "0.0.12"
version = "0.0.14"
edition = "2021"
description = "A fast html2md crate for rust"
categories = ["development-tools", "parsing", "parser-implementations"]
Expand Down
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ let md = parse_html("<p>JAMES</p>", false);
assert_eq!(md, "JAMES")
```

## Ignoring Tags

```rust
let mut tag_factory: HashMap<String, Box<dyn html2md::TagHandlerFactory>> =
HashMap::new();

let tag = Box::new(IgnoreTagFactory {});

tag_factory.insert(String::from("script"), tag.clone());
tag_factory.insert(String::from("style"), tag.clone());
tag_factory.insert(String::from("noscript"), tag.clone());
let html = html2md::parse_html_custom(&html, &tag_factory, false);
```

## Notes

This project is a practical rewrite from the original `html2md` with major bug fixes and performance improvements.
This project is a practical rewrite from the original `html2md` with major bug fixes and performance improvements.
2 changes: 1 addition & 1 deletion src/anchors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl TagHandler for AnchorHandler {
.find(|attr| attr.name.local.as_bytes() == b"href");

match href {
Some(link) => link.value.trim_ascii().into(),
Some(link) => link.value.trim().into(),
None => String::new(),
}
}
Expand Down
11 changes: 4 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,11 @@ fn walk(
NodeData::Document | NodeData::Doctype { .. } | NodeData::ProcessingInstruction { .. } => {}
NodeData::Text { ref contents } => {
let mut text = contents.borrow().to_string();

let inside_pre = result.parent_chain.iter().any(|t| t == "pre");
if inside_pre {
// this is preformatted text, insert as-is
result.append_str(&text);

} else if !(text.trim().len() == 0
&& (result.data.chars().last() == Some('\n')
|| result.data.chars().last() == Some(' ')))
Expand All @@ -147,7 +146,7 @@ fn walk(
text = escape_markdown(result, &text);
}
let minified_text = EXCESSIVE_WHITESPACE_PATTERN.replace_all(&text, " ");
result.append_str(&minified_text.trim_ascii());
result.append_str(&minified_text.trim());
}
}
NodeData::Comment { .. } => {} // ignore comments
Expand Down Expand Up @@ -202,8 +201,6 @@ fn walk(
}
}

let ignore_tags = tag_name == "style" || tag_name == "script";

// handle this tag, while it's not in parent chain
// and doesn't have child siblings
handler.handle(&input, result);
Expand All @@ -216,10 +213,10 @@ fn walk(
result.siblings.insert(current_depth, vec![]);

for child in input.children.borrow().iter() {
if handler.skip_descendants() || ignore_tags {
if handler.skip_descendants() {
continue;
}

walk(&child, result, custom, commonmark);

match child.data {
Expand Down
9 changes: 6 additions & 3 deletions tests/images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,23 @@ fn test_image_native_without_title() {
#[test]
fn test_image_embedded_html() {
let md = parse_html("<img src=\"https://i.redd.it/un4h28uwtp711.png\" alt=\"comics about Mac and GNU/Linux\" title=\"Look at me, brother\" height=\"150\" width=\"150\" />", false);
assert_eq!(md, "<img alt=\"comics about Mac and GNU/Linux\" src=\"https://i.redd.it/un4h28uwtp711.png\" title=\"Look at me, brother\" height=\"150\" width=\"150\" />")
assert_eq!(md, "![comics about Mac and GNU/Linux](https://i.redd.it/un4h28uwtp711.png \"Look at me, brother\")")
}

#[test]
fn test_image_embedded_with_unsupported_html() {
// srcset is unsupported in Markdown
let md = parse_html("<img src=\"https://i.redd.it/07onlc10x5711.png\" alt=\"HACKERMAN\" title=\"When you reboot instead of exiting vim\" height=\"150\" width=\"150\" srcset=\"image1 image2\" align=\"center\" />", false);
assert_eq!(md, "<img alt=\"HACKERMAN\" src=\"https://i.redd.it/07onlc10x5711.png\" title=\"When you reboot instead of exiting vim\" height=\"150\" width=\"150\" align=\"center\" />")
assert_eq!(md, "![HACKERMAN](https://i.redd.it/07onlc10x5711.png \"When you reboot instead of exiting vim\")")
}

#[test]
fn test_image_src_issue() {
let md = parse_html("<img src=\"https://dybr.ru/img/43/1532265494_android-Kanedias\" width=\"auto\" height=\"500\" >", false);
assert_eq!(md, "<img src=\"https://dybr.ru/img/43/1532265494_android-Kanedias\" height=\"500\" width=\"auto\" />")
assert_eq!(
md,
"![](https://dybr.ru/img/43/1532265494_android-Kanedias)"
)
}

#[test]
Expand Down
10 changes: 3 additions & 7 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ fn test_real_world_ja() {
println!("{}", result);
}


#[test]
#[ignore]
fn test_cheatsheet() {
Expand Down Expand Up @@ -127,11 +126,8 @@ fn test_tables_crash2() {
.expect("File must be readable");
let table_with_vertical_header = parse_html(&html, false);

assert_that!(table_with_vertical_header).contains(indoc! {"
|Current Conditions:|Open all year. No reservations. No services.|
|-------------------|--------------------------------------------|
| Reservations: | No reservations. |
| Fees | No fee. |
| Water: | No water. |"
println!("{:?}", table_with_vertical_header);

assert_that!(table_with_vertical_header).contains(indoc! {"\n\n## At a Glance\n\n|Current Conditions:|Open all year. No reservations. No services.|\n|||\n| Reservations: | No reservations. |\n| Fees | No fee. |\n| Water: | No water. |\n\n"
});
}
70 changes: 6 additions & 64 deletions tests/lists.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@ fn test_list_simple() {
);
assert_eq!(
md,
"\
* Seven things has lady Lackless
* Keeps them underneath her black dress
* One a thing that's not for wearing"
"\n\n* Seven things has lady Lackless\n* Keeps them underneath her black dress\n* One a thing that's not for wearing\n\n"
)
}

Expand All @@ -38,16 +35,7 @@ fn test_list_formatted() {
);
assert_eq!(
md,
"\
* You should NEVER see this error
* Broken lines, broken strings
* Broken threads, broken springs
* Broken idols, broken heads
* People sleep in broken beds
* Ain't no use jiving
* Ain't no use joking
* EVERYTHING IS BROKEN"
"\n\n* You should NEVER see this error\n * Broken lines, broken strings\n * Broken threads, broken springs\n * Broken idols, broken heads\n * People sleep in broken beds\n \n* Ain't no use jiving\n* Ain't no use joking\n* EVERYTHING IS BROKEN"
)
}

Expand Down Expand Up @@ -87,22 +75,7 @@ fn test_list_stackedit() {
);
assert_eq!(
md,
"\
* You should NEVER see this error
* Broken lines, broken strings
* Broken threads, broken springs
* Broken idols, broken heads
* People sleep in broken beds
* Ain’t no use jiving
* Ain’t no use joking
* EVERYTHING IS BROKEN"
"* You should NEVER see this error\n \n * Broken lines, broken strings\n \n * Broken threads, broken springs\n \n * Broken idols, broken heads\n \n * People sleep in broken beds\n \n \n* Ain’t no use jiving\n \n* Ain’t no use joking\n \n* EVERYTHING IS BROKEN"
)
}

Expand Down Expand Up @@ -144,22 +117,7 @@ fn test_list_stackedit_add_brs() {
);
assert_eq!(
md,
"\
* You should NEVER see this error
* Broken lines, broken strings
* Broken threads, broken springs
* Broken idols, broken heads
* People sleep in broken beds
* Ain’t no use jiving
* Ain’t no use joking
* EVERYTHING IS BROKEN"
"* You should NEVER see this error\n \n * Broken lines, broken strings\n \n * Broken threads, broken springs\n \n * Broken idols, broken heads\n \n * People sleep in broken beds\n \n \n \n \n* Ain’t no use jiving\n \n* Ain’t no use joking\n \n* EVERYTHING IS BROKEN"
)
}

Expand All @@ -180,13 +138,7 @@ fn test_list_multiline() {
);
assert_eq!(
md,
"\
1. In the heat and the rains
With whips and chains
Just to see him fly
So many die!"
"1. In the heat and the rains\n \n With whips and chains\n \n Just to see him fly\n So many die!"
)
}

Expand Down Expand Up @@ -214,17 +166,7 @@ fn test_list_multiline_formatted() {
);
assert_eq!(
md,
"\
* You should NEVER see this error
* Broken lines, broken strings
* Broken threads, broken springs
* Broken idols, broken heads
* People sleep in broken beds
* Ain't no use jiving
Ain't no use joking
EVERYTHING IS BROKEN"
"\n\n* You should NEVER see this error\n * Broken lines, broken strings\n * Broken threads, broken springs\n * Broken idols, broken heads\n * People sleep in broken beds\n * Ain't no use jiving\n \n Ain't no use joking\n \n EVERYTHING IS BROKEN"
)
}

Expand Down
18 changes: 4 additions & 14 deletions tests/quotes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ fn test_quotes() {
);
assert_eq!(
md,
"\
> here's a quote next line of it
And some text after it"
"\n\n> here's a quote next line of it\nAnd some text after it"
)
}

Expand All @@ -22,11 +19,7 @@ fn test_quotes2() {
let md = parse_html("<p><blockquote>here's<blockquote>nested quote!</blockquote> a quote\n next line of it</blockquote></p>", false);
assert_eq!(
md,
"\
> here's
> > nested quote!
>
> a quote next line of it"
"\n\n> here's\n> > nested quote!\n> a quote next line of it\n\n"
)
}

Expand All @@ -38,10 +31,7 @@ fn test_blockquotes() {
);
assert_eq!(
md,
"\
> Quote at the start of the message
Should not crash the parser"
"> Quote at the start of the message\nShould not crash the parser"
)
}

Expand All @@ -54,7 +44,7 @@ fn test_details() {
</details>
"};
let md = parse_html(&html, false);
assert_eq!(md, "<details> <summary>There are more things in heaven and Earth, **Horatio**</summary>\n\nThan are dreamt of in your philosophy\n\n</details>")
assert_eq!(md, "There are more things in heaven and Earth,**Horatio**\nThan are dreamt of in your philosophy")
}

#[test]
Expand Down
11 changes: 2 additions & 9 deletions tests/styles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use pretty_assertions::assert_eq;
#[test]
fn test_styles_with_spaces() {
let md = parse_html(r#"It read:<s> Nobody will ever love you</s>"#, false);
assert_eq!(md, r#"It read: ~~Nobody will ever love you~~"#)
assert_eq!(md, r#"It read:~~Nobody will ever love you~~"#)
}

#[test]
Expand All @@ -24,13 +24,6 @@ They gathered for the feast<br/>
);
assert_eq!(
md,
"\
And she said:
~~We are all just prisoners here
Of our own device~~
And in the master's chambers
They gathered for the feast
*They stab it with their steely knives*
**But they just can't kill the beast**"
"And she said:\n~~We are all just prisoners here\nOf our own device~~\nAnd in the master's chambers\nThey gathered for the feast\n*They stab it with their steely knives*\n**But they just can't kill the beast**"
)
}
Loading

0 comments on commit 43b5d3b

Please sign in to comment.