Skip to content

Commit

Permalink
fix in clean_url(): remove whitespace (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar authored Jan 11, 2024
1 parent 8d3e12e commit 80fdf0a
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
4 changes: 2 additions & 2 deletions courlan/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def scrub_url(url: str) -> str:
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \r\n"
)
# clean the input string
url = url.replace("[ \t]+", "")
# strip space in input string
url = "".join(url.split())
# <![CDATA[http://www.urbanlife.de/item/260-bmw-i8-hybrid-revolution-unter-den-sportwagen.html]]>
if url.startswith("<![CDATA["):
url = url.replace("<![CDATA[", "") # url = re.sub(r'^<!\[CDATA\[', '', url)
Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def test_scrub():
assert clean_url(5) is None
assert clean_url(\xaa") == "%C3%B8%C2%AA"
assert clean_url("https://example.org/?p=100") == "https://example.org/?p=100"
assert clean_url("https://example.org/\t?p=100") == "https://example.org/?p=100"
assert (
clean_url("https://example.org:443/file.html?p=100&abc=1#frag")
== "https://example.org/file.html?abc=1&p=100#frag"
Expand Down

0 comments on commit 80fdf0a

Please sign in to comment.