Skip to content

Commit

Permalink
fix: generate flat file names differently
Browse files Browse the repository at this point in the history
Fixes a few unusual issues with reports:

- #580: HTML report generation fails on too long path
- #584: File collisions in coverage report html
- #1167: Remove leading underscore in coverage html
  • Loading branch information
nedbat committed Jul 15, 2021
1 parent 0ff5a1c commit 4d05dde
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 25 deletions.
13 changes: 11 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,20 @@ Unreleased

- TOML parsing now uses the `tomli`_ library.

- Use a modern hash algorithm when fingerprinting to speed HTML reports
(`issue 1189`_).
- Some minor changes to usually invisible details of the HTML report:

- Use a modern hash algorithm when fingerprinting, for high-security
environments (`issue 1189`_).

- Change how report file names are generated, to avoid leading underscores
(`issue 1167`_), to avoid rare file name collisions (`issue 584`_), and to
avoid file names becoming too long (`issue 580`_).

.. _Django coverage plugin: https://pypi.org/project/django-coverage-plugin/
.. _issue 580: https://github.com/nedbat/coveragepy/issues/580
.. _issue 584: https://github.com/nedbat/coveragepy/issues/584
.. _issue 1150: https://github.com/nedbat/coveragepy/issues/1150
.. _issue 1167: https://github.com/nedbat/coveragepy/issues/1167
.. _issue 1168: https://github.com/nedbat/coveragepy/issues/1168
.. _issue 1189: https://github.com/nedbat/coveragepy/issues/1189
.. _tomli: https://pypi.org/project/tomli/
Expand Down
17 changes: 9 additions & 8 deletions coverage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def canonical_filename(filename):
return CANONICAL_FILENAME_CACHE[filename]


MAX_FLAT = 200
MAX_FLAT = 100

@contract(filename='unicode', returns='unicode')
def flat_rootname(filename):
Expand All @@ -87,15 +87,16 @@ def flat_rootname(filename):
the same directory, but need to differentiate same-named files from
different directories.
For example, the file a/b/c.py will return 'a_b_c_py'
For example, the file a/b/c.py will return 'd_86bbcbe134d28fd2_c_py'
"""
name = ntpath.splitdrive(filename)[1]
name = re.sub(r"[\\/.:]", "_", name)
if len(name) > MAX_FLAT:
h = hashlib.sha1(name.encode('UTF-8')).hexdigest()
name = name[-(MAX_FLAT-len(h)-1):] + '_' + h
return name
dirname, basename = ntpath.split(filename)
if dirname:
fp = hashlib.new("sha3_256", dirname.encode("UTF-8")).hexdigest()[:16]
prefix = f"d_{fp}_"
else:
prefix = ""
return prefix + basename.replace(".", "_")


if env.WINDOWS:
Expand Down
File renamed without changes.
File renamed without changes.
25 changes: 15 additions & 10 deletions tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,23 @@ def test_canonical_filename_ensure_cache_hit(self):


@pytest.mark.parametrize("original, flat", [
("a/b/c.py", "a_b_c_py"),
(r"c:\foo\bar.html", "_foo_bar_html"),
("Montréal/☺/conf.py", "Montréal_☺_conf_py"),
("abc.py", "abc_py"),
("hellothere", "hellothere"),
("a/b/c.py", "d_86bbcbe134d28fd2_c_py"),
("a/b/defghi.py", "d_86bbcbe134d28fd2_defghi_py"),
("/a/b/c.py", "d_bb25e0ada04227c6_c_py"),
("/a/b/defghi.py", "d_bb25e0ada04227c6_defghi_py"),
(r"c:\foo\bar.html", "d_e7c107482373f299_bar_html"),
(r"d:\foo\bar.html", "d_584a05dcebc67b46_bar_html"),
("Montréal/☺/conf.py", "d_c840497a2c647ce0_conf_py"),
( # original:
r"c:\lorem\ipsum\quia\dolor\sit\amet\consectetur\adipisci\velit\sed\quia\non"
r"\numquam\eius\modi\tempora\incidunt\ut\labore\et\dolore\magnam\aliquam"
r"\quaerat\voluptatem\ut\enim\ad\minima\veniam\quis\nostrum\exercitationem"
r"\ullam\corporis\suscipit\laboriosam\Montréal\☺\my_program.py",
r"c:\lorem\ipsum\quia\dolor\sit\amet\consectetur\adipisci\velit\sed" +
r"\quia\non\numquam\eius\modi\tempora\incidunt\ut\labore\et\dolore" +
r"\magnam\aliquam\quaerat\voluptatem\ut\enim\ad\minima\veniam\quis" +
r"\nostrum\exercitationem\ullam\corporis\suscipit\laboriosam" +
r"\Montréal\☺\my_program.py",
# flat:
"re_et_dolore_magnam_aliquam_quaerat_voluptatem_ut_enim_ad_minima_veniam_quis_"
"nostrum_exercitationem_ullam_corporis_suscipit_laboriosam_Montréal_☺_my_program_py_"
"97eaca41b860faaa1a21349b1f3009bb061cf0a8"
"d_e597dfacb73a23d5_my_program_py"
),
])
def test_flat_rootname(original, flat):
Expand Down
13 changes: 10 additions & 3 deletions tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def run_coverage(self, covargs=None, htmlargs=None):

def get_html_report_content(self, module):
"""Return the content of the HTML report for `module`."""
filename = module.replace(".", "_").replace("/", "_") + ".html"
filename = flat_rootname(module) + ".html"
filename = os.path.join("htmlcov", filename)
with open(filename) as f:
return f.read()
Expand Down Expand Up @@ -617,7 +617,7 @@ def filepath_to_regex(path):
return regex


def compare_html(expected, actual):
def compare_html(expected, actual, extra_scrubs=None):
"""Specialized compare function for our HTML files."""
scrubs = [
(r'/coverage.readthedocs.io/?[-.\w/]*', '/coverage.readthedocs.io/VER'),
Expand All @@ -640,6 +640,8 @@ def compare_html(expected, actual):
if env.WINDOWS:
# For file paths...
scrubs += [(r"\\", "/")]
if extra_scrubs:
scrubs += extra_scrubs
compare(expected, actual, file_pattern="*.html", scrubs=scrubs)


Expand Down Expand Up @@ -897,7 +899,12 @@ def test_other(self):
for p in glob.glob("out/other/*_other_py.html"):
os.rename(p, "out/other/blah_blah_other_py.html")

compare_html(gold_path("html/other"), "out/other")
compare_html(
gold_path("html/other"), "out/other",
extra_scrubs=[
(r'href="d_[0-9a-z]{16}_', 'href="_TEST_TMPDIR_othersrc_'),
],
)
contains(
"out/other/index.html",
'<a href="here_py.html">here.py</a>',
Expand Down
5 changes: 3 additions & 2 deletions tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,10 +1332,11 @@ def test_accented_directory(self):
# The HTML report uses ascii-encoded HTML entities.
out = self.run_command("coverage html")
assert out == ""
self.assert_exists("htmlcov/\xe2_accented_py.html")
self.assert_exists("htmlcov/d_5786906b6f0ffeb4_accented_py.html")
with open("htmlcov/index.html") as indexf:
index = indexf.read()
assert '<a href="&#226;_accented_py.html">&#226;%saccented.py</a>' % os.sep in index
expected = '<a href="d_5786906b6f0ffeb4_accented_py.html">&#226;%saccented.py</a>'
assert expected % os.sep in index

# The XML report is always UTF8-encoded.
out = self.run_command("coverage xml")
Expand Down

0 comments on commit 4d05dde

Please sign in to comment.