From a40efab857b7019c732ef3fcc2d689fc6260a30a Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Wed, 14 Jul 2021 06:59:30 -0400 Subject: [PATCH] fix: generate flat file names differently Fixes a few unusual issues with reports: - #580: HTML report generation fails on too long path - #584: File collisions in coverage report html - #1167: Remove leading underscore in coverage html --- CHANGES.rst | 13 ++++++++-- coverage/files.py | 17 +++++++------ ...r => d_80084bf2fba02475___init__.py,cover} | 0 ...py,cover => d_80084bf2fba02475_a.py,cover} | 0 ...r => d_b039179a8a4ce2c2___init__.py,cover} | 0 ...py,cover => d_b039179a8a4ce2c2_b.py,cover} | 0 tests/goldtest.py | 16 +++++++++++- tests/test_files.py | 25 +++++++++++-------- tests/test_html.py | 13 +++++++--- tests/test_process.py | 5 ++-- 10 files changed, 63 insertions(+), 26 deletions(-) rename tests/gold/annotate/anno_dir/{a___init__.py,cover => d_80084bf2fba02475___init__.py,cover} (100%) rename tests/gold/annotate/anno_dir/{a_a.py,cover => d_80084bf2fba02475_a.py,cover} (100%) rename tests/gold/annotate/anno_dir/{b___init__.py,cover => d_b039179a8a4ce2c2___init__.py,cover} (100%) rename tests/gold/annotate/anno_dir/{b_b.py,cover => d_b039179a8a4ce2c2_b.py,cover} (100%) diff --git a/CHANGES.rst b/CHANGES.rst index 850c30ecf..d2c4ae06d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -46,11 +46,20 @@ Unreleased - TOML parsing now uses the `tomli`_ library. -- Use a modern hash algorithm when fingerprinting to speed HTML reports - (`issue 1189`_). +- Some minor changes to usually invisible details of the HTML report: + + - Use a modern hash algorithm when fingerprinting, for high-security + environments (`issue 1189`_). + + - Change how report file names are generated, to avoid leading underscores + (`issue 1167`_), to avoid rare file name collisions (`issue 584`_), and to + avoid file names becoming too long (`issue 580`_). .. _Django coverage plugin: https://pypi.org/project/django-coverage-plugin/ +.. _issue 580: https://github.com/nedbat/coveragepy/issues/580 +.. _issue 584: https://github.com/nedbat/coveragepy/issues/584 .. _issue 1150: https://github.com/nedbat/coveragepy/issues/1150 +.. _issue 1167: https://github.com/nedbat/coveragepy/issues/1167 .. _issue 1168: https://github.com/nedbat/coveragepy/issues/1168 .. _issue 1189: https://github.com/nedbat/coveragepy/issues/1189 .. _tomli: https://pypi.org/project/tomli/ diff --git a/coverage/files.py b/coverage/files.py index 8de2ec676..252e42ec5 100644 --- a/coverage/files.py +++ b/coverage/files.py @@ -77,7 +77,7 @@ def canonical_filename(filename): return CANONICAL_FILENAME_CACHE[filename] -MAX_FLAT = 200 +MAX_FLAT = 100 @contract(filename='unicode', returns='unicode') def flat_rootname(filename): @@ -87,15 +87,16 @@ def flat_rootname(filename): the same directory, but need to differentiate same-named files from different directories. - For example, the file a/b/c.py will return 'a_b_c_py' + For example, the file a/b/c.py will return 'd_86bbcbe134d28fd2_c_py' """ - name = ntpath.splitdrive(filename)[1] - name = re.sub(r"[\\/.:]", "_", name) - if len(name) > MAX_FLAT: - h = hashlib.sha1(name.encode('UTF-8')).hexdigest() - name = name[-(MAX_FLAT-len(h)-1):] + '_' + h - return name + dirname, basename = ntpath.split(filename) + if dirname: + fp = hashlib.new("sha3_256", dirname.encode("UTF-8")).hexdigest()[:16] + prefix = f"d_{fp}_" + else: + prefix = "" + return prefix + basename.replace(".", "_") if env.WINDOWS: diff --git a/tests/gold/annotate/anno_dir/a___init__.py,cover b/tests/gold/annotate/anno_dir/d_80084bf2fba02475___init__.py,cover similarity index 100% rename from tests/gold/annotate/anno_dir/a___init__.py,cover rename to tests/gold/annotate/anno_dir/d_80084bf2fba02475___init__.py,cover diff --git a/tests/gold/annotate/anno_dir/a_a.py,cover b/tests/gold/annotate/anno_dir/d_80084bf2fba02475_a.py,cover similarity index 100% rename from tests/gold/annotate/anno_dir/a_a.py,cover rename to tests/gold/annotate/anno_dir/d_80084bf2fba02475_a.py,cover diff --git a/tests/gold/annotate/anno_dir/b___init__.py,cover b/tests/gold/annotate/anno_dir/d_b039179a8a4ce2c2___init__.py,cover similarity index 100% rename from tests/gold/annotate/anno_dir/b___init__.py,cover rename to tests/gold/annotate/anno_dir/d_b039179a8a4ce2c2___init__.py,cover diff --git a/tests/gold/annotate/anno_dir/b_b.py,cover b/tests/gold/annotate/anno_dir/d_b039179a8a4ce2c2_b.py,cover similarity index 100% rename from tests/gold/annotate/anno_dir/b_b.py,cover rename to tests/gold/annotate/anno_dir/d_b039179a8a4ce2c2_b.py,cover diff --git a/tests/goldtest.py b/tests/goldtest.py index b9d59217c..bafb6f552 100644 --- a/tests/goldtest.py +++ b/tests/goldtest.py @@ -81,7 +81,7 @@ def compare( actual_file = os.path.join(actual_dir, f) with open(actual_file) as fobj: - actual = fobj.read() + actual = actual_original = fobj.read() if actual_file.endswith(".xml"): actual = canonicalize_xml(actual) @@ -95,6 +95,20 @@ def compare( print(f":::: diff {expected_file!r} and {actual_file!r}") print("\n".join(difflib.Differ().compare(expected, actual))) print(f":::: end diff {expected_file!r} and {actual_file!r}") + + save_path = expected_dir.replace("/gold/", "/actual/") + os.makedirs(save_path, exist_ok=True) + with open(os.path.join(save_path, f), "w") as savef: + savef.write(actual_original) + + if not actual_extra: + for f in actual_only: + save_path = expected_dir.replace("/gold/", "/actual/") + os.makedirs(save_path, exist_ok=True) + with open(os.path.join(save_path, f), "w") as savef: + with open(os.path.join(actual_dir, f)) as readf: + savef.write(readf.read()) + assert not text_diff, "Files differ: %s" % '\n'.join(text_diff) assert not expected_only, f"Files in {expected_dir} only: {expected_only}" diff --git a/tests/test_files.py b/tests/test_files.py index 98ece632f..39a51d8c2 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -69,18 +69,23 @@ def test_canonical_filename_ensure_cache_hit(self): @pytest.mark.parametrize("original, flat", [ - ("a/b/c.py", "a_b_c_py"), - (r"c:\foo\bar.html", "_foo_bar_html"), - ("Montréal/☺/conf.py", "Montréal_☺_conf_py"), + ("abc.py", "abc_py"), + ("hellothere", "hellothere"), + ("a/b/c.py", "d_86bbcbe134d28fd2_c_py"), + ("a/b/defghi.py", "d_86bbcbe134d28fd2_defghi_py"), + ("/a/b/c.py", "d_bb25e0ada04227c6_c_py"), + ("/a/b/defghi.py", "d_bb25e0ada04227c6_defghi_py"), + (r"c:\foo\bar.html", "d_e7c107482373f299_bar_html"), + (r"d:\foo\bar.html", "d_584a05dcebc67b46_bar_html"), + ("Montréal/☺/conf.py", "d_c840497a2c647ce0_conf_py"), ( # original: - r"c:\lorem\ipsum\quia\dolor\sit\amet\consectetur\adipisci\velit\sed\quia\non" - r"\numquam\eius\modi\tempora\incidunt\ut\labore\et\dolore\magnam\aliquam" - r"\quaerat\voluptatem\ut\enim\ad\minima\veniam\quis\nostrum\exercitationem" - r"\ullam\corporis\suscipit\laboriosam\Montréal\☺\my_program.py", + r"c:\lorem\ipsum\quia\dolor\sit\amet\consectetur\adipisci\velit\sed" + + r"\quia\non\numquam\eius\modi\tempora\incidunt\ut\labore\et\dolore" + + r"\magnam\aliquam\quaerat\voluptatem\ut\enim\ad\minima\veniam\quis" + + r"\nostrum\exercitationem\ullam\corporis\suscipit\laboriosam" + + r"\Montréal\☺\my_program.py", # flat: - "re_et_dolore_magnam_aliquam_quaerat_voluptatem_ut_enim_ad_minima_veniam_quis_" - "nostrum_exercitationem_ullam_corporis_suscipit_laboriosam_Montréal_☺_my_program_py_" - "97eaca41b860faaa1a21349b1f3009bb061cf0a8" + "d_e597dfacb73a23d5_my_program_py" ), ]) def test_flat_rootname(original, flat): diff --git a/tests/test_html.py b/tests/test_html.py index 56519a641..a0ab2d4a0 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -56,7 +56,7 @@ def run_coverage(self, covargs=None, htmlargs=None): def get_html_report_content(self, module): """Return the content of the HTML report for `module`.""" - filename = module.replace(".", "_").replace("/", "_") + ".html" + filename = flat_rootname(module) + ".html" filename = os.path.join("htmlcov", filename) with open(filename) as f: return f.read() @@ -617,7 +617,7 @@ def filepath_to_regex(path): return regex -def compare_html(expected, actual): +def compare_html(expected, actual, extra_scrubs=None): """Specialized compare function for our HTML files.""" scrubs = [ (r'/coverage.readthedocs.io/?[-.\w/]*', '/coverage.readthedocs.io/VER'), @@ -640,6 +640,8 @@ def compare_html(expected, actual): if env.WINDOWS: # For file paths... scrubs += [(r"\\", "/")] + if extra_scrubs: + scrubs += extra_scrubs compare(expected, actual, file_pattern="*.html", scrubs=scrubs) @@ -897,7 +899,12 @@ def test_other(self): for p in glob.glob("out/other/*_other_py.html"): os.rename(p, "out/other/blah_blah_other_py.html") - compare_html(gold_path("html/other"), "out/other") + compare_html( + gold_path("html/other"), "out/other", + extra_scrubs=[ + (r'href="d_[0-9a-z]{16}_', 'href="_TEST_TMPDIR_othersrc_'), + ], + ) contains( "out/other/index.html", 'here.py', diff --git a/tests/test_process.py b/tests/test_process.py index 04a8a2d53..58915b876 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -1332,10 +1332,11 @@ def test_accented_directory(self): # The HTML report uses ascii-encoded HTML entities. out = self.run_command("coverage html") assert out == "" - self.assert_exists("htmlcov/\xe2_accented_py.html") + self.assert_exists("htmlcov/d_5786906b6f0ffeb4_accented_py.html") with open("htmlcov/index.html") as indexf: index = indexf.read() - assert 'â%saccented.py' % os.sep in index + expected = 'â%saccented.py' + assert expected % os.sep in index # The XML report is always UTF8-encoded. out = self.run_command("coverage xml")