Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support tri-backquote style code block and fix ordered list indent #431

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli
self.hide_strikethrough = False # covered in cli
self.mark_code = config.MARK_CODE
self.backquote_code_style = config.BACKQUOTE_CODE_STYLE
self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli
self.wrap_links = config.WRAP_LINKS # covered in cli
self.wrap_tables = config.WRAP_TABLES
Expand Down Expand Up @@ -111,6 +112,8 @@ def __init__(
self.blockquote = 0
self.pre = False
self.startpre = False
self.pre_indent = ""
self.list_code_indent = ""
self.code = False
self.quote = False
self.br_toggle = ""
Expand Down Expand Up @@ -629,6 +632,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.lastWasList = False

if tag == "li":
self.list_code_indent = ""
self.pbr()
if start:
if self.list:
Expand All @@ -644,15 +648,16 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
# TODO: line up <ol><li>s > 9 correctly.
parent_list = None
for list in self.list:
self.o(
" " if parent_list == "ol" and list.name == "ul" else " "
)
self.list_code_indent += " " if parent_list == "ol" else " "
parent_list = list.name
self.o(self.list_code_indent)

if li.name == "ul":
self.list_code_indent += " "
self.o(self.ul_item_mark + " ")
elif li.name == "ol":
li.num += 1
self.list_code_indent += " "
self.o(str(li.num) + ". ")
self.start = True

Expand Down Expand Up @@ -715,8 +720,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
if start:
self.startpre = True
self.pre = True
self.pre_indent = ""
else:
self.pre = False
if self.backquote_code_style:
self.out("\n" + self.pre_indent + "```")
if self.mark_code:
self.out("\n[/code]")
self.p()
Expand Down Expand Up @@ -786,17 +794,23 @@ def o(
bq += " "

if self.pre:
if not self.list:
if self.list:
bq += self.list_code_indent

if not self.backquote_code_style:
bq += " "
# else: list content is already partially indented
bq += " " * len(self.list)

data = data.replace("\n", "\n" + bq)
self.pre_indent = bq

if self.startpre:
self.startpre = False
if self.list:
if self.backquote_code_style:
self.out("\n" + self.pre_indent + "```")
self.p_p = 0
elif self.list:
# use existing initial indentation
data = data.lstrip("\n")
data = data.lstrip("\n" + self.pre_indent)

if self.start:
self.space = False
Expand Down Expand Up @@ -952,8 +966,15 @@ def optwrap(self, text: str) -> str:
# because of the presence of a link in it
if not self.wrap_links:
self.inline_links = False
start_code = False
for para in text.split("\n"):
if len(para) > 0:
# If the text is between tri-backquote pairs, it's a code block;
# don't wrap
if self.backquote_code_style and para.lstrip().startswith("```"):
start_code = not start_code
if start_code:
result += para + "\n"
elif len(para) > 0:
if not skipwrap(
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
):
Expand Down
8 changes: 8 additions & 0 deletions html2text/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ class bcolors:
default=config.MARK_CODE,
help="Mark program code blocks with [code]...[/code]",
)
p.add_argument(
"--backquote-code-style",
action="store_true",
dest="backquote_code_style",
default=config.BACKQUOTE_CODE_STYLE,
help="Multi line code block using tri-backquote style",
)
p.add_argument(
"--decode-errors",
dest="decode_errors",
Expand Down Expand Up @@ -318,6 +325,7 @@ class bcolors:
h.skip_internal_links = args.skip_internal_links
h.links_each_paragraph = args.links_each_paragraph
h.mark_code = args.mark_code
h.backquote_code_style = args.backquote_code_style
h.wrap_links = args.wrap_links
h.wrap_list_items = args.wrap_list_items
h.wrap_tables = args.wrap_tables
Expand Down
1 change: 1 addition & 0 deletions html2text/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
IMAGES_WITH_SIZE = False
IGNORE_EMPHASIS = False
MARK_CODE = False
BACKQUOTE_CODE_STYLE = False
DECODE_ERRORS = "strict"
DEFAULT_IMAGE_ALT = ""
PAD_TABLES = False
Expand Down
31 changes: 31 additions & 0 deletions test/backquote_code_style.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<p><pre>
def func(x):
if x &lt; 1:
return 'a'
return 'b'
</pre></p>

<ul>
<li>unordered</li>
<li>...</li>
<ol>
<li>ordered</li>
<li>code:
<pre>a
b
c</pre>
</li>
<li>...</li>
<ol>
<li>ordered</li>
<li>code:
<pre>d
e
f</pre>
</li>
<li>...</li>
</ol>
<li>end</li>
</ol>
<li>end</li>
</ul>
32 changes: 32 additions & 0 deletions test/backquote_code_style.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

```
def func(x):
if x < 1:
return 'a'
return 'b'

```

* unordered
* ...
1. ordered
2. code:
```
a
b
c
```

3. ...
1. ordered
2. code:
```
d
e
f
```

3. ...
4. end
* end

25 changes: 25 additions & 0 deletions test/mixed_nested_lists.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,28 @@
<li>end</li>
</ul>


<ul>
<li>unordered</li>
<li>...</li>
<ol>
<li>ordered</li>
<li>code:
<pre>a
b
c</pre>
</li>
<li>...</li>
<ol>
<li>ordered</li>
<li>code:
<pre>d
e
f</pre>
</li>
<li>...</li>
</ol>
<li>end</li>
</ol>
<li>end</li>
</ul>
22 changes: 22 additions & 0 deletions test/mixed_nested_lists.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,25 @@
1. ordered
2. ...
* end

* unordered
* ...
1. ordered
2. code:

a
b
c

3. ...
1. ordered
2. code:

d
e
f

3. ...
4. end
* end

4 changes: 2 additions & 2 deletions test/normal.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ text to separate lists

1. now with numbers
2. the prisoner
1. not an _italic number_
2. a **bold human** being
1. not an _italic number_
2. a **bold human** being
3. end

**bold**
Expand Down
4 changes: 2 additions & 2 deletions test/normal_escape_snob.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ text to separate lists

1. now with numbers
2. the prisoner
1. not an _italic number_
2. a **bold human** being
1. not an _italic number_
2. a **bold human** being
3. end

**bold**
Expand Down
2 changes: 1 addition & 1 deletion test/preformatted_in_list.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
* Run this command:

ls -l *.html

* ?
Expand Down
5 changes: 5 additions & 0 deletions test/test_html2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ def generate_testdata():
cmdline_args.append("--mark-code")
func_args = skip

if base_fn.startswith("backquote_code_style"):
module_args["backquote_code_style"] = True
cmdline_args.append("--backquote-code-style")
func_args = skip

if base_fn.startswith("pad_table"):
module_args["pad_tables"] = True
cmdline_args.append("--pad-tables")
Expand Down
Loading