Skip to content

Commit

Permalink
[benchmarks] Fix processing of PyTorch profiler events. (#7930)
Browse files Browse the repository at this point in the history
  • Loading branch information
ysiraichi authored Aug 30, 2024
1 parent beeb40b commit 1b4b828
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions benchmarks/experiment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,11 +637,11 @@ def _collect_cuda_cpu_metrics(self, pytorch_profile: Optional[profile],
if evt.device_type == DeviceType.CPU:
# In legacy profiler, kernel info is stored in cpu events
if evt.is_legacy:
total_cuda_time += evt.self_cuda_time_total
total_cuda_time += evt.self_device_time_total
elif evt.device_type == DeviceType.CUDA:
# In kineto profiler, there're events with the correct device type
# (e.g. CUDA)
total_cuda_time += evt.self_cuda_time_total
total_cuda_time += evt.self_device_time_total

metrics["total_cpu_time_s"] = us_to_s(total_cpu_time)
metrics["total_cuda_time_s"] = us_to_s(total_cuda_time)
Expand All @@ -661,9 +661,9 @@ def is_aten_op(op_name):

extract_prof_info = lambda event: {
"self_cpu_time_s": us_to_s(event.self_cpu_time_total),
"self_cuda_time_s": us_to_s(event.self_cuda_time_total),
"self_cuda_time_s": us_to_s(event.self_device_time_total),
"total_cpu_time_s": us_to_s(event.cpu_time_total),
"total_cuda_time_s": us_to_s(event.cuda_time_total),
"total_cuda_time_s": us_to_s(event.device_time_total),
"num_of_calls": event.count
}

Expand Down

0 comments on commit 1b4b828

Please sign in to comment.