Skip to content

Commit

Permalink
Rework use_directory_url=True validation to better catch incorrect li…
Browse files Browse the repository at this point in the history
…nks (#80)
  • Loading branch information
ben-foxmoore authored Mar 26, 2024
1 parent ac22c24 commit d9fd2ce
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 13 deletions.
24 changes: 13 additions & 11 deletions htmlproofer/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,20 @@ def is_url_target_valid(url: str, src_path: str, files: Dict[str, File]) -> bool
return True

url_target, _, optional_anchor = match.groups()
_, extension = os.path.splitext(url_target)
if extension == ".html":
# URL is a link to another local Markdown file that may include an anchor.
target_markdown = HtmlProoferPlugin.find_target_markdown(url_target, src_path, files)
if target_markdown is None:
# The corresponding Markdown page was not found.
return False
if optional_anchor and not HtmlProoferPlugin.contains_anchor(target_markdown, optional_anchor):
# The corresponding Markdown header for this anchor was not found.
return False
elif HtmlProoferPlugin.find_source_file(url_target, src_path, files) is None:
source_file = HtmlProoferPlugin.find_source_file(url_target, src_path, files)
if source_file is None:
return False

# If there's an anchor (fragment) on the link, we try to find it in the source_file
if optional_anchor:
_, extension = os.path.splitext(source_file.src_uri)
# Currently only Markdown-based pages are supported, but conceptually others could be added below
if extension == ".md":
if source_file.page is None or source_file.page.markdown is None:
return False
if not HtmlProoferPlugin.contains_anchor(source_file.page.markdown, optional_anchor):
return False

return True

@staticmethod
Expand Down
7 changes: 5 additions & 2 deletions tests/integration/docs/nested/page1.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ This plugin can detect invalid anchor links to another page, such as
[Acknowledgement](../index.md#BAD_ANCHOR)
or to a nested page
[Invalid Anchor](./page2.md#BAD_ANCHOR).
It should also detect links to pages in the same directory without './'
[Invalid Anchor](page2.md#BAD_ANCHOR).

But allows valid anchors such as
[Main Page](../index.md#mkdocs-htmlproofer-plugin),
[Table of Contents](../index.md#table-of-contents), and
[Emoji Anchor](./page2.md#title-with-emojis).
[Table of Contents](../index.md#table-of-contents),
[Emoji Anchor](./page2.md#title-with-emojis), and
[Good Heading](page2.md#good-heading).

## Image Link absolute/relative

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/docs/nested/page2.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Second Nested Test Page

## :smile_cat: Title with Emojis :material-star: :octicons-apps-16:

## Good Heading
2 changes: 2 additions & 0 deletions tests/integration/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ plugins:
'#acknowledge',
'../index.html#BAD_ANCHOR',
'page2.html#BAD_ANCHOR',
'../../#BAD_ANCHOR', # if use_directory_urls=True
'../page2/#BAD_ANCHOR', # if use_directory_urls=True
'../../../tests',
]
skip_downloads: True
51 changes: 51 additions & 0 deletions tests/unit/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,51 @@ def test_get_url_status__local_page(plugin):
set(), files) == 0


def test_get_url_status__local_page_with_directory_urls(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
page1_page = Mock(spec=Page, markdown='# Page One\n## Sub Heading\nContent')
special_char_page = Mock(spec=Page, markdown='# Heading éèà\n## Sub Heading éèà\nContent')
mock_files = Files([
Mock(spec=File, src_path='index.md', dest_path='index/index.html',
dest_uri='index/index.html', url='index/', src_uri='index.md',
page=index_page),
Mock(spec=File, src_path='page1.md', dest_path='page1/index.html',
dest_uri='page1/index.html', url='page1/', src_uri='page1.md',
page=page1_page),
Mock(spec=File, src_path='Dir éèà/éèà.md', dest_path='Dir éèà/éèà/index.html',
dest_uri='Dir éèà/éèà/index.html',
url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/',
src_uri='Dir éèà/éèà.md', page=special_char_page),
Mock(spec=File, src_path='Dir éèà/page1.md', dest_path='Dir éèà/page1/index.html',
dest_uri='Dir éèà/page1/index.html',
url='Dir%20%C3%A9%C3%A8%C3%A0/page1/',
src_uri='Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
page=special_char_page),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
files.update({file.src_uri: file for file in mock_files})

assert plugin.get_url_status('../index/', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../index/#heading', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../index/#bad-heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status('../page1/', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../page1/#sub-heading', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../page1/#heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status('../page2/', 'page1.md', set(), files) == 404
assert plugin.get_url_status('../page2/#heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status(
'../Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
'page1.md', set(), files) == 0
assert plugin.get_url_status(
'../%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
'Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
set(), files) == 0


def test_get_url_status__non_markdown_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
mock_files = Files([
Expand All @@ -293,6 +338,9 @@ def test_get_url_status__non_markdown_page(plugin):
Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg',
dest_uri='index.html', url='drawing.svg', src_uri='drawing.svg',
page=None),
Mock(spec=File, src_path='page.html', dest_path='page.html',
dest_uri='page.html', url='page.html', src_uri='page.html',
page=None),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
Expand All @@ -302,6 +350,9 @@ def test_get_url_status__non_markdown_page(plugin):
assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files) == 0
assert plugin.get_url_status('not-existing.svg', 'index.md', set(), files) == 404

assert plugin.get_url_status('page.html', 'index.md', set(), files) == 0
assert plugin.get_url_status('page.html#heading', 'index.md', set(), files) == 0 # no validation for non-markdown pages


def test_get_url_status__local_page_nested(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
Expand Down

0 comments on commit d9fd2ce

Please sign in to comment.