From 637e3ce6866fb7de34b0de127e2a251592016e1b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 19 Sep 2023 12:47:17 +0100 Subject: [PATCH 1/2] Fix reported power efficiency for inferred results, #1494 --- tools/submission/submission_checker.py | 40 ++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 2d06e01b7..296c4d955 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1643,7 +1643,43 @@ def get_performance_metric( if scenario == "MultiStream" and config.uses_legacy_multistream() else scenario ) + samples_per_query = mlperf_log["effective_samples_per_query"] + if scenario == "SingleStream": + # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions + qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"] + res = float(mlperf_log[RESULT_FIELD_NEW[config.version][scenario_for_res]]) + + inferred = False + # special case for results inferred from different scenario + if scenario_fixed in ["Offline"] and scenario in ["SingleStream"]: + inferred = True + res = qps_wo_loadgen_overhead + + if ( + scenario_fixed in ["Offline"] and not config.uses_legacy_multistream() + ) and scenario in ["MultiStream"]: + inferred = True + res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) + + if ( + scenario_fixed in ["MultiStream"] and not config.uses_legacy_multistream() + ) and scenario in ["SingleStream"]: + inferred = True + # samples_per_query does not match with the one reported in the logs + # when inferring MultiStream from SingleStream + samples_per_query = 8 + if uses_early_stopping: + early_stopping_latency_ms = mlperf_log["early_stopping_latency_ms"] + if early_stopping_latency_ms == 0: + log.error( + "Not enough samples were processed for early stopping to make an estimate" + ) + is_valid = False + res = (early_stopping_latency_ms * samples_per_query) / MS_TO_NS + else: + res = (latency_99_percentile * samples_per_query) / MS_TO_NS + return res def check_performance_dir( @@ -2718,8 +2754,8 @@ def log_result( ranging_path, perf_path, scenario_fixed, - r, ranging_r, + r, config, ) if not power_is_valid: @@ -2741,7 +2777,7 @@ def log_result( "{:f} " "with " "power_metric" - " = {:f} and power_efficiency (inf/J) = {:f}" + " = {:f} and power_efficiency (samples/J) = {:f}" ).format(r, power_metric, power_efficiency) ) From 6028c982735ee020b7491185a058e627aaff121c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 20 Sep 2023 15:12:24 +0100 Subject: [PATCH 2/2] Refactor get_inferred_result code --- tools/submission/submission_checker.py | 89 +++++++++++--------------- 1 file changed, 38 insertions(+), 51 deletions(-) diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 296c4d955..6c2265916 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1643,42 +1643,12 @@ def get_performance_metric( if scenario == "MultiStream" and config.uses_legacy_multistream() else scenario ) - samples_per_query = mlperf_log["effective_samples_per_query"] - if scenario == "SingleStream": - # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions - qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"] res = float(mlperf_log[RESULT_FIELD_NEW[config.version][scenario_for_res]]) inferred = False - # special case for results inferred from different scenario - if scenario_fixed in ["Offline"] and scenario in ["SingleStream"]: - inferred = True - res = qps_wo_loadgen_overhead - - if ( - scenario_fixed in ["Offline"] and not config.uses_legacy_multistream() - ) and scenario in ["MultiStream"]: - inferred = True - res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) - - if ( - scenario_fixed in ["MultiStream"] and not config.uses_legacy_multistream() - ) and scenario in ["SingleStream"]: - inferred = True - # samples_per_query does not match with the one reported in the logs - # when inferring MultiStream from SingleStream - samples_per_query = 8 - if uses_early_stopping: - early_stopping_latency_ms = mlperf_log["early_stopping_latency_ms"] - if early_stopping_latency_ms == 0: - log.error( - "Not enough samples were processed for early stopping to make an estimate" - ) - is_valid = False - res = (early_stopping_latency_ms * samples_per_query) / MS_TO_NS - else: - res = (latency_99_percentile * samples_per_query) / MS_TO_NS + if scenario_fixed != scenario: + inferred, res = get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, False) return res @@ -1718,9 +1688,6 @@ def check_performance_dir( min_query_count = mlperf_log["effective_min_query_count"] samples_per_query = mlperf_log["effective_samples_per_query"] min_duration = mlperf_log["effective_min_duration_ms"] - if scenario == "SingleStream": - # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions - qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"] sut_name = mlperf_log["sut_name"] else: fname = os.path.join(path, "mlperf_log_summary.txt") @@ -1887,6 +1854,40 @@ def check_performance_dir( ) inferred = False + if scenario_fixed != scenario: + inferred, res = get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, True) + + is_network_system, is_network_mode_valid = is_system_over_network( + division, system_json, path + ) + is_valid &= is_network_mode_valid + if is_network_system: + # for network mode verify the SUT name is valid, accodring to the rules (must include "Network SUT" in name) + if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name: + log.error( + f"{fname} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'" + ) + is_valid = False + + return is_valid, res, inferred + +def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_error=False): + + inferred = False + # Check if current scenario (and version) uses early stopping + uses_early_stopping = config.uses_early_stopping(scenario) + + latency_mean = mlperf_log["result_mean_latency_ns"] + if scenario in ["MultiStream"]: + latency_99_percentile = mlperf_log[ + "result_99.00_percentile_per_query_latency_ns" + ] + latency_mean = mlperf_log["result_mean_query_latency_ns"] + samples_per_query = mlperf_log["effective_samples_per_query"] + if scenario == "SingleStream": + # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions + qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"] + # special case for results inferred from different scenario if scenario_fixed in ["Offline"] and scenario in ["SingleStream"]: inferred = True @@ -1907,7 +1908,7 @@ def check_performance_dir( samples_per_query = 8 if uses_early_stopping: early_stopping_latency_ms = mlperf_log["early_stopping_latency_ms"] - if early_stopping_latency_ms == 0: + if early_stopping_latency_ms == 0 and log_error: log.error( "Not enough samples were processed for early stopping to make an estimate" ) @@ -1915,21 +1916,7 @@ def check_performance_dir( res = (early_stopping_latency_ms * samples_per_query) / MS_TO_NS else: res = (latency_99_percentile * samples_per_query) / MS_TO_NS - - is_network_system, is_network_mode_valid = is_system_over_network( - division, system_json, path - ) - is_valid &= is_network_mode_valid - if is_network_system: - # for network mode verify the SUT name is valid, accodring to the rules (must include "Network SUT" in name) - if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name: - log.error( - f"{fname} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'" - ) - is_valid = False - - return is_valid, res, inferred - + return inferred, res def get_power_metric(config, scenario_fixed, log_path, is_valid, res): # parse the power logs