From 2ed57c62cc17fcbfc0fd25e5bbf340f19626ff9a Mon Sep 17 00:00:00 2001 From: Oleg K <84438358+OlegGolfer7@users.noreply.github.com> Date: Mon, 11 Dec 2023 18:59:55 +0200 Subject: [PATCH] Added analysis command and json output (#76) new command for json output --- README.md | 16 +++ deepview_profile/__main__.py | 2 + deepview_profile/commands/analysis.py | 177 ++++++++++++++++++++++++++ deepview_profile/nvml.py | 9 ++ deepview_profile/utils.py | 43 ++++++- 5 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 deepview_profile/commands/analysis.py diff --git a/README.md b/README.md index 9c0c242..eb0f308 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,22 @@ You just need to use `deepview memory` instead of `deepview time`. python3 -m deepview_profile memory entry_point.py --output my_output_file.sqlite ``` +To export various available analysis to json file, you may use `deepview analysis --all` command for exact entry point and output file. It is required to later view the analysis on the web viewer. + +It is also possible to run several optional analysis. There are such analysis available: `--measure-breakdown`, `--measure-throughput`, `--habitat-predict`, `--measure-utilization`, `--energy-compute`, `--exclude-source` + +```zsh +python3 -m deepview_profile analysis entry_point.py --all --exclude-source --output=complete_analysis.json +``` + +`--exclude-source` option allows not adding `encodedFiles` section to output, that is available for `--measure-breakdown` analysis + +or various combinations of optional analysis + +```zsh +python3 -m deepview_profile analysis entry_point.py --measure-breakdown --measure-throughput --habitat-predict --measure-utilization --energy-compute --output=various_analysis.json +``` +

Development Environment Setup

From the project root, do diff --git a/deepview_profile/__main__.py b/deepview_profile/__main__.py index 52382e8..205026c 100644 --- a/deepview_profile/__main__.py +++ b/deepview_profile/__main__.py @@ -8,6 +8,7 @@ import deepview_profile.commands.interactive import deepview_profile.commands.memory import deepview_profile.commands.time +import deepview_profile.commands.analysis def main(): @@ -25,6 +26,7 @@ def main(): deepview_profile.commands.interactive.register_command(subparsers) deepview_profile.commands.memory.register_command(subparsers) deepview_profile.commands.time.register_command(subparsers) + deepview_profile.commands.analysis.register_command(subparsers) args = parser.parse_args() if args.version: diff --git a/deepview_profile/commands/analysis.py b/deepview_profile/commands/analysis.py new file mode 100644 index 0000000..704c5a2 --- /dev/null +++ b/deepview_profile/commands/analysis.py @@ -0,0 +1,177 @@ +import logging +import os +import sys +import json +import platform + +from deepview_profile.analysis.runner import analyze_project +from deepview_profile.nvml import NVML +from deepview_profile.utils import release_memory, next_message_to_dict, files_encoded_unique + +from deepview_profile.initialization import ( + check_skyline_preconditions, + initialize_skyline, +) +from deepview_profile.error_printing import print_analysis_error + +logger = logging.getLogger(__name__) + +def register_command(subparsers): + parser = subparsers.add_parser( + "analysis", + help="Generate usage report for various analysis.", + ) + parser.add_argument( + "entry_point", + help="The entry point file in this project that contains the DeepView " + "provider functions." + ) + parser.add_argument( + "--all", + action="store_true", + help="The complete analysis of all methods" + ) + parser.add_argument( + "-breakdown", "--measure-breakdown", + action="store_true", + help="Adds breakdown data to results" + ) + parser.add_argument( + "-throughput", "--measure-throughput", + action="store_true", + help="Adds throughput data to results" + ) + parser.add_argument( + "-predict", "--habitat-predict", + action="store_true", + help="Adds habitat data prediction to results" + ) + parser.add_argument( + "-utilization", "--measure-utilization", + action="store_true", + help="Adds utilization data to results" + ) + parser.add_argument( + "-energy", "--energy-compute", + action="store_true", + help="Adds energy use to results" + ) + parser.add_argument( + "-o", "--output", + help="The location where the complete report should be stored", + required=True + ) + parser.add_argument( + "--log-file", + help="The location of the log file", + ) + parser.add_argument( + "--exclude-source", + action="store_true", + help="Allows not adding encodedFiles section" + ) + parser.add_argument("--debug", action="store_true", help="Log debug messages.") + parser.set_defaults(func=main) + +def measure_breakdown(session, nvml): + print("analysis: running measure_breakdown()") + yield session.measure_breakdown(nvml) + release_memory() + +def measure_throughput(session): + print("analysis: running measure_throughput()") + yield session.measure_throughput() + release_memory() + +def habitat_predict(session): + print("analysis: running deepview_predict()") + yield session.habitat_predict() + release_memory() + +def measure_utilization(session): + print("analysis: running measure_utilization()") + yield session.measure_utilization() + release_memory() + +def energy_compute(session): + print("analysis: running energy_compute()") + yield session.energy_compute() + release_memory() + +def hardware_information(nvml): + + hardware_info = { + 'hostname': platform.node(), + 'os': " ".join(list(platform.uname())), + 'gpus': nvml.get_device_names() + } + return hardware_info + +def actual_main(args): + from deepview_profile.analysis.session import AnalysisSession + from deepview_profile.exceptions import AnalysisError + + if os.path.exists(args.output): + print( + "ERROR: The specified output file already exists.", + file=sys.stderr, + ) + sys.exit(1) + + try: + project_root = os.getcwd() + data = { + "analysisState": { + "message_type": "analysis", + "project_root": project_root, + "project_entry_point": args.entry_point, + "hardware_info": {}, + "throughput": {}, + "breakdown": {}, + "habitat": {}, + "additionalProviders": "", + "energy": {}, + "utilization": {} + }, + "epochs": 50, + "iterPerEpoch": 1000, + "encodedFiles": [] + } + + session = AnalysisSession.new_from(project_root, args.entry_point) + release_memory() + + is_return_all = args.all + + with NVML() as nvml: + data['analysisState']['hardware_info'] = hardware_information(nvml) + if args.measure_breakdown or is_return_all: + data['analysisState']['breakdown'] = next_message_to_dict(measure_breakdown(session, nvml)) + + operation_tree = data['analysisState']['breakdown']['operationTree'] + if not args.exclude_source and operation_tree is not None: + data['encodedFiles'] = files_encoded_unique(operation_tree) + + if args.measure_throughput or is_return_all: + data['analysisState']['throughput'] = next_message_to_dict(measure_throughput(session)) + + if args.habitat_predict or is_return_all: + data['analysisState']['habitat'] = next_message_to_dict(habitat_predict(session)) + + if args.measure_utilization or is_return_all: + data['analysisState']['utilization'] = next_message_to_dict(measure_utilization(session)) + + if args.energy_compute or is_return_all: + data['analysisState']['energy'] = next_message_to_dict(energy_compute(session)) + + with open(args.output, "w") as json_file: + json.dump(data, json_file, indent=4) + + except AnalysisError as ex: + print_analysis_error(ex) + sys.exit(1) + +def main(args): + check_skyline_preconditions(args) + initialize_skyline(args) + actual_main(args) \ No newline at end of file diff --git a/deepview_profile/nvml.py b/deepview_profile/nvml.py index 5497fa4..7903098 100644 --- a/deepview_profile/nvml.py +++ b/deepview_profile/nvml.py @@ -19,3 +19,12 @@ def get_memory_capacity(self): # TODO: Support multiple devices handle = pynvml.nvmlDeviceGetHandleByIndex(0) return pynvml.nvmlDeviceGetMemoryInfo(handle) + + def get_device_names(self): + device_names = [] + for i in range(pynvml.nvmlDeviceGetCount()): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8") + device_names.append(device_name) + return device_names + diff --git a/deepview_profile/utils.py b/deepview_profile/utils.py index 4f43bc6..f26af8b 100644 --- a/deepview_profile/utils.py +++ b/deepview_profile/utils.py @@ -1,10 +1,51 @@ import torch import logging import gc +import os +import base64 + +from google.protobuf.json_format import MessageToDict logger = logging.getLogger(__name__) def release_memory(): logger.debug("Emptying cache") gc.collect() - torch.cuda.empty_cache() \ No newline at end of file + torch.cuda.empty_cache() + +def next_message_to_dict(object): + message = next(object) + return MessageToDict(message) + +def files_encoded_unique(operation_tree): + encoded_files = [] + + for analysis in operation_tree: + context_info_map = analysis['operation'].get('contextInfoMap', None) + if context_info_map is not None and len(context_info_map) > 0: + filename = list(context_info_map[0]['context']['filePath']['components']).pop() + + already_in_list = next((item for item in encoded_files if item['name'] == filename), None) + if not already_in_list: + file_path = os.path.join("", *list(context_info_map[0]['context']['filePath']['components'])) + + encoded_file = encode_file("", file_path) + encoded_files.append(encoded_file) + + return encoded_files + +def encode_file(root, file): + file_dict = None + if os.path.splitext(file)[1] == ".py" and file != "entry_point.py": + file_dict = { + "name": file, + "content": "" + } + + filename = os.path.join(root, file) + + with open(filename, "r") as f: + file_content = f.read() + file_dict["content"] = base64.b64encode(file_content.encode("utf-8")).decode("utf-8") + + return file_dict