diff --git a/html2text/__init__.py b/html2text/__init__.py index 1a3c8e6..a937dc7 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -78,6 +78,7 @@ def __init__( self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli self.hide_strikethrough = False # covered in cli self.mark_code = config.MARK_CODE + self.backquote_code_style = config.BACKQUOTE_CODE_STYLE self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli self.wrap_links = config.WRAP_LINKS # covered in cli self.wrap_tables = config.WRAP_TABLES @@ -111,6 +112,8 @@ def __init__( self.blockquote = 0 self.pre = False self.startpre = False + self.pre_indent = "" + self.list_code_indent = "" self.code = False self.quote = False self.br_toggle = "" @@ -629,6 +632,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.lastWasList = False if tag == "li": + self.list_code_indent = "" self.pbr() if start: if self.list: @@ -644,15 +648,16 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: # TODO: line up
  1. s > 9 correctly. parent_list = None for list in self.list: - self.o( - " " if parent_list == "ol" and list.name == "ul" else " " - ) + self.list_code_indent += " " if parent_list == "ol" else " " parent_list = list.name + self.o(self.list_code_indent) if li.name == "ul": + self.list_code_indent += " " self.o(self.ul_item_mark + " ") elif li.name == "ol": li.num += 1 + self.list_code_indent += " " self.o(str(li.num) + ". ") self.start = True @@ -715,8 +720,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: if start: self.startpre = True self.pre = True + self.pre_indent = "" else: self.pre = False + if self.backquote_code_style: + self.out("\n" + self.pre_indent + "```") if self.mark_code: self.out("\n[/code]") self.p() @@ -786,17 +794,23 @@ def o( bq += " " if self.pre: - if not self.list: + if self.list: + bq += self.list_code_indent + + if not self.backquote_code_style: bq += " " - # else: list content is already partially indented - bq += " " * len(self.list) + data = data.replace("\n", "\n" + bq) + self.pre_indent = bq if self.startpre: self.startpre = False - if self.list: + if self.backquote_code_style: + self.out("\n" + self.pre_indent + "```") + self.p_p = 0 + elif self.list: # use existing initial indentation - data = data.lstrip("\n") + data = data.lstrip("\n" + self.pre_indent) if self.start: self.space = False @@ -952,8 +966,15 @@ def optwrap(self, text: str) -> str: # because of the presence of a link in it if not self.wrap_links: self.inline_links = False + start_code = False for para in text.split("\n"): - if len(para) > 0: + # If the text is between tri-backquote pairs, it's a code block; + # don't wrap + if self.backquote_code_style and para.lstrip().startswith("```"): + start_code = not start_code + if start_code: + result += para + "\n" + elif len(para) > 0: if not skipwrap( para, self.wrap_links, self.wrap_list_items, self.wrap_tables ): diff --git a/html2text/cli.py b/html2text/cli.py index 0153227..202a77f 100644 --- a/html2text/cli.py +++ b/html2text/cli.py @@ -238,6 +238,13 @@ class bcolors: default=config.MARK_CODE, help="Mark program code blocks with [code]...[/code]", ) + p.add_argument( + "--backquote-code-style", + action="store_true", + dest="backquote_code_style", + default=config.BACKQUOTE_CODE_STYLE, + help="Multi line code block using tri-backquote style", + ) p.add_argument( "--decode-errors", dest="decode_errors", @@ -318,6 +325,7 @@ class bcolors: h.skip_internal_links = args.skip_internal_links h.links_each_paragraph = args.links_each_paragraph h.mark_code = args.mark_code + h.backquote_code_style = args.backquote_code_style h.wrap_links = args.wrap_links h.wrap_list_items = args.wrap_list_items h.wrap_tables = args.wrap_tables diff --git a/html2text/config.py b/html2text/config.py index 4069740..ee316b4 100644 --- a/html2text/config.py +++ b/html2text/config.py @@ -48,6 +48,7 @@ IMAGES_WITH_SIZE = False IGNORE_EMPHASIS = False MARK_CODE = False +BACKQUOTE_CODE_STYLE = False DECODE_ERRORS = "strict" DEFAULT_IMAGE_ALT = "" PAD_TABLES = False diff --git a/test/backquote_code_style.html b/test/backquote_code_style.html new file mode 100644 index 0000000..68bb3e7 --- /dev/null +++ b/test/backquote_code_style.html @@ -0,0 +1,31 @@ +

    +def func(x):
    +  if x < 1:
    +    return 'a'
    +  return 'b'
    +

    + + diff --git a/test/backquote_code_style.md b/test/backquote_code_style.md new file mode 100644 index 0000000..3e696af --- /dev/null +++ b/test/backquote_code_style.md @@ -0,0 +1,32 @@ + +``` +def func(x): + if x < 1: + return 'a' + return 'b' + +``` + + * unordered + * ... + 1. ordered + 2. code: + ``` + a + b + c + ``` + + 3. ... + 1. ordered + 2. code: + ``` + d + e + f + ``` + + 3. ... + 4. end + * end + diff --git a/test/mixed_nested_lists.html b/test/mixed_nested_lists.html index c7ed28d..fc96752 100644 --- a/test/mixed_nested_lists.html +++ b/test/mixed_nested_lists.html @@ -18,3 +18,28 @@
  2. end
  3. + + diff --git a/test/mixed_nested_lists.md b/test/mixed_nested_lists.md index b131c19..7c3f5bc 100644 --- a/test/mixed_nested_lists.md +++ b/test/mixed_nested_lists.md @@ -9,3 +9,25 @@ 1. ordered 2. ... * end + + * unordered + * ... + 1. ordered + 2. code: + + a + b + c + + 3. ... + 1. ordered + 2. code: + + d + e + f + + 3. ... + 4. end + * end + diff --git a/test/normal.md b/test/normal.md index 2294930..5ec6743 100644 --- a/test/normal.md +++ b/test/normal.md @@ -12,8 +12,8 @@ text to separate lists 1. now with numbers 2. the prisoner - 1. not an _italic number_ - 2. a **bold human** being + 1. not an _italic number_ + 2. a **bold human** being 3. end **bold** diff --git a/test/normal_escape_snob.md b/test/normal_escape_snob.md index d6d3d9f..d4ca6cd 100644 --- a/test/normal_escape_snob.md +++ b/test/normal_escape_snob.md @@ -12,8 +12,8 @@ text to separate lists 1. now with numbers 2. the prisoner - 1. not an _italic number_ - 2. a **bold human** being + 1. not an _italic number_ + 2. a **bold human** being 3. end **bold** diff --git a/test/preformatted_in_list.md b/test/preformatted_in_list.md index 2b1124e..9b10e2e 100644 --- a/test/preformatted_in_list.md +++ b/test/preformatted_in_list.md @@ -1,5 +1,5 @@ * Run this command: - + ls -l *.html * ? diff --git a/test/test_html2text.py b/test/test_html2text.py index 5db8971..1f16b4f 100644 --- a/test/test_html2text.py +++ b/test/test_html2text.py @@ -117,6 +117,11 @@ def generate_testdata(): cmdline_args.append("--mark-code") func_args = skip + if base_fn.startswith("backquote_code_style"): + module_args["backquote_code_style"] = True + cmdline_args.append("--backquote-code-style") + func_args = skip + if base_fn.startswith("pad_table"): module_args["pad_tables"] = True cmdline_args.append("--pad-tables")