Skip to content

Commit

Permalink
chore(tables): remove <br /> auto inject tables
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Oct 24, 2024
1 parent 3093a04 commit fe74819
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fast_html2md"
version = "0.0.15"
version = "0.0.16"
edition = "2021"
description = "A fast html2md crate for rust"
categories = ["development-tools", "parsing", "parser-implementations"]
Expand Down
58 changes: 46 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,35 +125,64 @@ fn walk(
let mut handler: Box<dyn TagHandler> = Box::new(DummyHandler::default());
let mut tag_name = String::default();

let mut inside_pre = false;
let mut inside_code = false;
let mut ignore_write = false;

let find_parent_tags = match &input.data {
NodeData::Element { .. } => true,
NodeData::Text { .. } => true,
_ => false,
};

if find_parent_tags {
for tag in result.parent_chain.iter() {
if tag == "code" {
inside_code = true;
break;
}
if tag == "pre" {
inside_pre = true;
break;
}
if tag_name == "script" || tag_name == "style" {
ignore_write = true;
break;
}
}
}

match input.data {
NodeData::Document | NodeData::Doctype { .. } | NodeData::ProcessingInstruction { .. } => {}
NodeData::Text { ref contents } => {
let mut text = contents.borrow().to_string();

let inside_pre = result.parent_chain.iter().any(|t| t == "pre");
if inside_pre {
// this is preformatted text, insert as-is
result.append_str(&text);
} else if !(text.trim().len() == 0
&& (result.data.chars().last() == Some('\n')
|| result.data.chars().last() == Some(' ')))
{
// in case it's not just a whitespace after the newline or another whitespace
if !ignore_write {
if !inside_code {
text = escape_markdown(result, &text);
}

// regular text, collapse whitespace and newlines in text
let inside_code = result.parent_chain.iter().any(|t| t == "code");
if !inside_code {
text = escape_markdown(result, &text);
let minified_text = EXCESSIVE_WHITESPACE_PATTERN.replace_all(&text, " ");
result.append_str(&minified_text.trim());
}
let minified_text = EXCESSIVE_WHITESPACE_PATTERN.replace_all(&text, " ");
result.append_str(&minified_text.trim());
}
}
NodeData::Comment { .. } => {} // ignore comments
NodeData::Element { ref name, .. } => {
let inside_pre = result.parent_chain.iter().any(|tag| tag == "pre");
tag_name = name.local.to_string();

// do not parse scripts or style tags
if tag_name == "script" || tag_name == "style" {
return;
}

if inside_pre {
// don't add any html tags inside the pre section
handler = Box::new(DummyHandler::default());
Expand Down Expand Up @@ -196,8 +225,6 @@ fn walk(
// supports only single tables as of now
"table" => Box::new(TableHandler::default()),
"iframe" => Box::new(IframeHandler::default()),
// other
"html" | "head" | "body" => Box::new(DummyHandler::default()),
_ => Box::new(DummyHandler::default()),
}
}
Expand Down Expand Up @@ -226,7 +253,14 @@ fn walk(

match child.data {
NodeData::Element { ref name, .. } => match result.siblings.get_mut(&current_depth) {
Some(el) => el.push(name.local.to_string()),
Some(el) => {
let eln = name.local.to_string();
let ignore_push = eln == "script" || eln == "style";

if !ignore_push {
el.push(eln)
}
}
_ => (),
},
_ => (),
Expand Down
4 changes: 1 addition & 3 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,6 @@ where
fn to_text(tag: &Handle, commonmark: bool) -> String {
let mut printer = StructuredPrinter::default();
walk(tag, &mut printer, &HashMap::default(), commonmark);

let result = clean_markdown(&printer.data);

result.replace("\n", "<br/>")
result.replace("\n", "")
}
2 changes: 2 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ fn test_tables_with_newlines() {
.filter(|line| !line.ends_with("|"))
.collect();

println!("{:?}", result);

assert_that(&invalid_table_lines).is_empty();
}

Expand Down

0 comments on commit fe74819

Please sign in to comment.