From 7b5d49e8738468fb5ae816628c58a556fdd78491 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Thu, 28 Dec 2023 10:50:51 -0700 Subject: [PATCH 1/2] fix flops --- composer/callbacks/speed_monitor.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/composer/callbacks/speed_monitor.py b/composer/callbacks/speed_monitor.py index 2b0eeedc80..03a6a59e88 100644 --- a/composer/callbacks/speed_monitor.py +++ b/composer/callbacks/speed_monitor.py @@ -107,20 +107,18 @@ def get_gpu_flops_available(state: State): device_name = 'v100-pcie' elif 't4' in device_name: device_name = 't4' - else: - device_name = None - if device_name is not None: - try: - gpu_flops_available = int(GPU_AVAILABLE_FLOPS[device_name][state.precision.value]) - except: - gpu_flops_available = None + if device_name in GPU_AVAILABLE_FLOPS and state.precision.value in GPU_AVAILABLE_FLOPS[device_name]: + gpu_flops_available = int(GPU_AVAILABLE_FLOPS[device_name][state.precision.value]) + else: + gpu_flops_available = None if gpu_flops_available is None: warnings.warn( - f'gpu_flop count not found for {device_name} with precision: {state.precision.value}; ' +\ - f'MFU cannot be calculated and reported. gpu_flops_available can be manually' +\ - f'overridden by setting gpu_flops_available in SpeedMonitor.' + f'gpu_flop count not found for {device_name} with precision={state.precision.value} ' +\ + f'so MFU cannot be calculated and reported. gpu_flops_available can be manually ' +\ + f'overridden by setting gpu_flops_available in SpeedMonitor or {device_name} can ' +\ + f'be added to GPU_AVAILABLE_FLOPS in composer/callbacks/speed_monitor.py' ) # Setting to 0 will disable MFU computation and prevent # the speed monitor from running this helper every batch From 512f2d2ea295568393abe3848bf2d1ade819c384 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 2 Jan 2024 12:14:34 -0700 Subject: [PATCH 2/2] stacklevel --- composer/callbacks/speed_monitor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/composer/callbacks/speed_monitor.py b/composer/callbacks/speed_monitor.py index 03a6a59e88..e574b8e713 100644 --- a/composer/callbacks/speed_monitor.py +++ b/composer/callbacks/speed_monitor.py @@ -118,7 +118,8 @@ def get_gpu_flops_available(state: State): f'gpu_flop count not found for {device_name} with precision={state.precision.value} ' +\ f'so MFU cannot be calculated and reported. gpu_flops_available can be manually ' +\ f'overridden by setting gpu_flops_available in SpeedMonitor or {device_name} can ' +\ - f'be added to GPU_AVAILABLE_FLOPS in composer/callbacks/speed_monitor.py' + f'be added to GPU_AVAILABLE_FLOPS in composer/callbacks/speed_monitor.py', + stacklevel=2, ) # Setting to 0 will disable MFU computation and prevent # the speed monitor from running this helper every batch