Skip to content

Commit

Permalink
fixup! perf: hash data files during combining to avoid unneeded work. #…
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed Nov 8, 2022
1 parent 7c30cb6 commit ab8aad6
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 24 deletions.
53 changes: 29 additions & 24 deletions coverage/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def combine_parallel_data(
raise NoDataError("No data to combine")

file_hashes = set()
files_combined = 0
combined_any = False

for f in files_to_combine:
if f == data.data_filename():
# Sometimes we are combining into a file which is one of the
Expand All @@ -133,33 +134,37 @@ def combine_parallel_data(
hasher = hashlib.new("sha3_256")
hasher.update(fobj.read())
sha = hasher.digest()
if sha in file_hashes:
if message:
message(f"Skipping duplicate data {rel_file_name}")
continue
file_hashes.add(sha)
combine_this_one = sha not in file_hashes

if data._debug.should('dataio'):
data._debug.write(f"Combining data file {f!r}")
try:
new_data = CoverageData(f, debug=data._debug)
new_data.read()
except CoverageException as exc:
if data._warn:
# The CoverageException has the file name in it, so just
# use the message as the warning.
data._warn(str(exc))
delete_this_one = not keep
if combine_this_one:
if data._debug.should('dataio'):
data._debug.write(f"Combining data file {f!r}")
file_hashes.add(sha)
try:
new_data = CoverageData(f, debug=data._debug)
new_data.read()
except CoverageException as exc:
if data._warn:
# The CoverageException has the file name in it, so just
# use the message as the warning.
data._warn(str(exc))
delete_this_one = False
else:
data.update(new_data, aliases=aliases)
combined_any = True
if message:
message(f"Combined data file {rel_file_name}")
else:
data.update(new_data, aliases=aliases)
files_combined += 1
if message:
message(f"Combined data file {rel_file_name}")
if not keep:
if data._debug.should('dataio'):
data._debug.write(f"Deleting combined data file {f!r}")
file_be_gone(f)
message(f"Skipping duplicate data {rel_file_name}")

if delete_this_one:
if data._debug.should('dataio'):
data._debug.write(f"Deleting data file {f!r}")
file_be_gone(f)

if strict and not files_combined:
if strict and not combined_any:
raise NoDataError("No usable data files")


Expand Down
1 change: 1 addition & 0 deletions tests/test_concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def try_multiprocessing_code(
)
for line in out_lines
)
assert len(glob.glob(".coverage.*")) == 0
out = self.run_command("coverage report -m")

last_line = self.squeezed_lines(out)[-1]
Expand Down

0 comments on commit ab8aad6

Please sign in to comment.