diff --git a/html2text/__init__.py b/html2text/__init__.py
index 1a3c8e6..a937dc7 100644
--- a/html2text/__init__.py
+++ b/html2text/__init__.py
@@ -78,6 +78,7 @@ def __init__(
self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli
self.hide_strikethrough = False # covered in cli
self.mark_code = config.MARK_CODE
+ self.backquote_code_style = config.BACKQUOTE_CODE_STYLE
self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli
self.wrap_links = config.WRAP_LINKS # covered in cli
self.wrap_tables = config.WRAP_TABLES
@@ -111,6 +112,8 @@ def __init__(
self.blockquote = 0
self.pre = False
self.startpre = False
+ self.pre_indent = ""
+ self.list_code_indent = ""
self.code = False
self.quote = False
self.br_toggle = ""
@@ -629,6 +632,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.lastWasList = False
if tag == "li":
+ self.list_code_indent = ""
self.pbr()
if start:
if self.list:
@@ -644,15 +648,16 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
# TODO: line up
- s > 9 correctly.
parent_list = None
for list in self.list:
- self.o(
- " " if parent_list == "ol" and list.name == "ul" else " "
- )
+ self.list_code_indent += " " if parent_list == "ol" else " "
parent_list = list.name
+ self.o(self.list_code_indent)
if li.name == "ul":
+ self.list_code_indent += " "
self.o(self.ul_item_mark + " ")
elif li.name == "ol":
li.num += 1
+ self.list_code_indent += " "
self.o(str(li.num) + ". ")
self.start = True
@@ -715,8 +720,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
if start:
self.startpre = True
self.pre = True
+ self.pre_indent = ""
else:
self.pre = False
+ if self.backquote_code_style:
+ self.out("\n" + self.pre_indent + "```")
if self.mark_code:
self.out("\n[/code]")
self.p()
@@ -786,17 +794,23 @@ def o(
bq += " "
if self.pre:
- if not self.list:
+ if self.list:
+ bq += self.list_code_indent
+
+ if not self.backquote_code_style:
bq += " "
- # else: list content is already partially indented
- bq += " " * len(self.list)
+
data = data.replace("\n", "\n" + bq)
+ self.pre_indent = bq
if self.startpre:
self.startpre = False
- if self.list:
+ if self.backquote_code_style:
+ self.out("\n" + self.pre_indent + "```")
+ self.p_p = 0
+ elif self.list:
# use existing initial indentation
- data = data.lstrip("\n")
+ data = data.lstrip("\n" + self.pre_indent)
if self.start:
self.space = False
@@ -952,8 +966,15 @@ def optwrap(self, text: str) -> str:
# because of the presence of a link in it
if not self.wrap_links:
self.inline_links = False
+ start_code = False
for para in text.split("\n"):
- if len(para) > 0:
+ # If the text is between tri-backquote pairs, it's a code block;
+ # don't wrap
+ if self.backquote_code_style and para.lstrip().startswith("```"):
+ start_code = not start_code
+ if start_code:
+ result += para + "\n"
+ elif len(para) > 0:
if not skipwrap(
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
):
diff --git a/html2text/cli.py b/html2text/cli.py
index 0153227..202a77f 100644
--- a/html2text/cli.py
+++ b/html2text/cli.py
@@ -238,6 +238,13 @@ class bcolors:
default=config.MARK_CODE,
help="Mark program code blocks with [code]...[/code]",
)
+ p.add_argument(
+ "--backquote-code-style",
+ action="store_true",
+ dest="backquote_code_style",
+ default=config.BACKQUOTE_CODE_STYLE,
+ help="Multi line code block using tri-backquote style",
+ )
p.add_argument(
"--decode-errors",
dest="decode_errors",
@@ -318,6 +325,7 @@ class bcolors:
h.skip_internal_links = args.skip_internal_links
h.links_each_paragraph = args.links_each_paragraph
h.mark_code = args.mark_code
+ h.backquote_code_style = args.backquote_code_style
h.wrap_links = args.wrap_links
h.wrap_list_items = args.wrap_list_items
h.wrap_tables = args.wrap_tables
diff --git a/html2text/config.py b/html2text/config.py
index 4069740..ee316b4 100644
--- a/html2text/config.py
+++ b/html2text/config.py
@@ -48,6 +48,7 @@
IMAGES_WITH_SIZE = False
IGNORE_EMPHASIS = False
MARK_CODE = False
+BACKQUOTE_CODE_STYLE = False
DECODE_ERRORS = "strict"
DEFAULT_IMAGE_ALT = ""
PAD_TABLES = False
diff --git a/test/backquote_code_style.html b/test/backquote_code_style.html
new file mode 100644
index 0000000..68bb3e7
--- /dev/null
+++ b/test/backquote_code_style.html
@@ -0,0 +1,31 @@
+
+def func(x):
+ if x < 1:
+ return 'a'
+ return 'b'
+
+
+
+ - unordered
+ - ...
+
+ - ordered
+ - code:
+
a
+b
+c
+
+ - ...
+
+ - ordered
+ - code:
+
d
+e
+f
+
+ - ...
+
+ - end
+
+ - end
+
diff --git a/test/backquote_code_style.md b/test/backquote_code_style.md
new file mode 100644
index 0000000..3e696af
--- /dev/null
+++ b/test/backquote_code_style.md
@@ -0,0 +1,32 @@
+
+```
+def func(x):
+ if x < 1:
+ return 'a'
+ return 'b'
+
+```
+
+ * unordered
+ * ...
+ 1. ordered
+ 2. code:
+ ```
+ a
+ b
+ c
+ ```
+
+ 3. ...
+ 1. ordered
+ 2. code:
+ ```
+ d
+ e
+ f
+ ```
+
+ 3. ...
+ 4. end
+ * end
+
diff --git a/test/mixed_nested_lists.html b/test/mixed_nested_lists.html
index c7ed28d..fc96752 100644
--- a/test/mixed_nested_lists.html
+++ b/test/mixed_nested_lists.html
@@ -18,3 +18,28 @@
- end
+
+
+ - unordered
+ - ...
+
+ - ordered
+ - code:
+
a
+b
+c
+
+ - ...
+
+ - ordered
+ - code:
+
d
+e
+f
+
+ - ...
+
+ - end
+
+ - end
+
diff --git a/test/mixed_nested_lists.md b/test/mixed_nested_lists.md
index b131c19..7c3f5bc 100644
--- a/test/mixed_nested_lists.md
+++ b/test/mixed_nested_lists.md
@@ -9,3 +9,25 @@
1. ordered
2. ...
* end
+
+ * unordered
+ * ...
+ 1. ordered
+ 2. code:
+
+ a
+ b
+ c
+
+ 3. ...
+ 1. ordered
+ 2. code:
+
+ d
+ e
+ f
+
+ 3. ...
+ 4. end
+ * end
+
diff --git a/test/normal.md b/test/normal.md
index 2294930..5ec6743 100644
--- a/test/normal.md
+++ b/test/normal.md
@@ -12,8 +12,8 @@ text to separate lists
1. now with numbers
2. the prisoner
- 1. not an _italic number_
- 2. a **bold human** being
+ 1. not an _italic number_
+ 2. a **bold human** being
3. end
**bold**
diff --git a/test/normal_escape_snob.md b/test/normal_escape_snob.md
index d6d3d9f..d4ca6cd 100644
--- a/test/normal_escape_snob.md
+++ b/test/normal_escape_snob.md
@@ -12,8 +12,8 @@ text to separate lists
1. now with numbers
2. the prisoner
- 1. not an _italic number_
- 2. a **bold human** being
+ 1. not an _italic number_
+ 2. a **bold human** being
3. end
**bold**
diff --git a/test/preformatted_in_list.md b/test/preformatted_in_list.md
index 2b1124e..9b10e2e 100644
--- a/test/preformatted_in_list.md
+++ b/test/preformatted_in_list.md
@@ -1,5 +1,5 @@
* Run this command:
-
+
ls -l *.html
* ?
diff --git a/test/test_html2text.py b/test/test_html2text.py
index 5db8971..1f16b4f 100644
--- a/test/test_html2text.py
+++ b/test/test_html2text.py
@@ -117,6 +117,11 @@ def generate_testdata():
cmdline_args.append("--mark-code")
func_args = skip
+ if base_fn.startswith("backquote_code_style"):
+ module_args["backquote_code_style"] = True
+ cmdline_args.append("--backquote-code-style")
+ func_args = skip
+
if base_fn.startswith("pad_table"):
module_args["pad_tables"] = True
cmdline_args.append("--pad-tables")