From 2ed57c62cc17fcbfc0fd25e5bbf340f19626ff9a Mon Sep 17 00:00:00 2001
From: Oleg K <84438358+OlegGolfer7@users.noreply.github.com>
Date: Mon, 11 Dec 2023 18:59:55 +0200
Subject: [PATCH] Added analysis command and json output (#76)

new command for json output
---
 README.md                             |  16 +++
 deepview_profile/__main__.py          |   2 +
 deepview_profile/commands/analysis.py | 177 ++++++++++++++++++++++++++
 deepview_profile/nvml.py              |   9 ++
 deepview_profile/utils.py             |  43 ++++++-
 5 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 deepview_profile/commands/analysis.py
diff --git a/README.md b/README.md
index 9c0c242..eb0f308 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,22 @@ You just need to use `deepview memory` instead of `deepview time`.
 python3 -m deepview_profile memory entry_point.py --output my_output_file.sqlite
 ```
 
+To export various available analysis to json file, you may use `deepview analysis --all` command for exact entry point and output file. It is required to later view the analysis on the web viewer.
+
+It is also possible to run several optional analysis. There are such analysis available: `--measure-breakdown`, `--measure-throughput`, `--habitat-predict`, `--measure-utilization`, `--energy-compute`, `--exclude-source`
+
+```zsh
+python3 -m deepview_profile analysis entry_point.py --all --exclude-source --output=complete_analysis.json 
+```
+
+`--exclude-source` option allows not adding `encodedFiles` section to output, that is available for `--measure-breakdown` analysis
+
+or various combinations of optional analysis
+
+```zsh
+python3 -m deepview_profile analysis entry_point.py --measure-breakdown --measure-throughput --habitat-predict --measure-utilization --energy-compute --output=various_analysis.json
+```
+
 <h2 id="dev-setup">Development Environment Setup</h2>
 
 From the project root, do
diff --git a/deepview_profile/__main__.py b/deepview_profile/__main__.py
index 52382e8..205026c 100644
--- a/deepview_profile/__main__.py
+++ b/deepview_profile/__main__.py
@@ -8,6 +8,7 @@
 import deepview_profile.commands.interactive
 import deepview_profile.commands.memory
 import deepview_profile.commands.time
+import deepview_profile.commands.analysis
 
 
 def main():
@@ -25,6 +26,7 @@ def main():
     deepview_profile.commands.interactive.register_command(subparsers)
     deepview_profile.commands.memory.register_command(subparsers)
     deepview_profile.commands.time.register_command(subparsers)
+    deepview_profile.commands.analysis.register_command(subparsers)
     args = parser.parse_args()
 
     if args.version:
diff --git a/deepview_profile/commands/analysis.py b/deepview_profile/commands/analysis.py
new file mode 100644
index 0000000..704c5a2
--- /dev/null
+++ b/deepview_profile/commands/analysis.py
@@ -0,0 +1,177 @@
+import logging 
+import os
+import sys
+import json
+import platform
+
+from deepview_profile.analysis.runner import analyze_project
+from deepview_profile.nvml import NVML
+from deepview_profile.utils import release_memory, next_message_to_dict, files_encoded_unique
+
+from deepview_profile.initialization import (
+    check_skyline_preconditions,
+    initialize_skyline,
+)
+from deepview_profile.error_printing import print_analysis_error
+
+logger = logging.getLogger(__name__)
+
+def register_command(subparsers):
+    parser = subparsers.add_parser(
+        "analysis",
+        help="Generate usage report for various analysis.",
+    )
+    parser.add_argument(
+        "entry_point",
+        help="The entry point file in this project that contains the DeepView "
+             "provider functions."
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="The complete analysis of all methods"
+    )
+    parser.add_argument(
+        "-breakdown", "--measure-breakdown",
+        action="store_true",
+        help="Adds breakdown data to results"
+    )
+    parser.add_argument(
+        "-throughput", "--measure-throughput",
+        action="store_true",
+        help="Adds throughput data to results"
+    )
+    parser.add_argument(
+        "-predict", "--habitat-predict",
+        action="store_true",
+        help="Adds habitat data prediction to results"
+    )
+    parser.add_argument(
+        "-utilization", "--measure-utilization",
+        action="store_true",
+        help="Adds utilization data to results"
+    )
+    parser.add_argument(
+        "-energy", "--energy-compute",
+        action="store_true",
+        help="Adds energy use to results"
+    )
+    parser.add_argument(
+        "-o", "--output",
+        help="The location where the complete report should be stored",
+        required=True
+    )
+    parser.add_argument(
+        "--log-file",
+        help="The location of the log file",
+    )
+    parser.add_argument(
+        "--exclude-source",
+        action="store_true",
+        help="Allows not adding encodedFiles section"
+    )
+    parser.add_argument("--debug", action="store_true", help="Log debug messages.")
+    parser.set_defaults(func=main)
+
+def measure_breakdown(session, nvml):
+    print("analysis: running measure_breakdown()")
+    yield session.measure_breakdown(nvml)
+    release_memory()
+
+def measure_throughput(session):
+    print("analysis: running measure_throughput()")
+    yield session.measure_throughput()
+    release_memory()
+
+def habitat_predict(session):
+    print("analysis: running deepview_predict()")
+    yield session.habitat_predict()
+    release_memory()
+
+def measure_utilization(session):
+    print("analysis: running measure_utilization()")
+    yield session.measure_utilization()
+    release_memory()
+
+def energy_compute(session):
+    print("analysis: running energy_compute()")
+    yield session.energy_compute()
+    release_memory()
+
+def hardware_information(nvml):
+    
+    hardware_info = { 
+        'hostname': platform.node(),
+        'os': " ".join(list(platform.uname())),
+        'gpus': nvml.get_device_names()
+    }
+    return hardware_info
+
+def actual_main(args): 
+    from deepview_profile.analysis.session import AnalysisSession
+    from deepview_profile.exceptions import AnalysisError
+
+    if os.path.exists(args.output):
+        print(
+            "ERROR: The specified output file already exists.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    try: 
+        project_root = os.getcwd()
+        data = { 
+            "analysisState": {
+                "message_type": "analysis",
+                "project_root": project_root,
+                "project_entry_point": args.entry_point,
+                "hardware_info": {},
+                "throughput": {},
+                "breakdown": {},
+                "habitat": {},
+                "additionalProviders": "",
+                "energy": {},
+                "utilization": {}
+            },
+            "epochs": 50,
+            "iterPerEpoch": 1000,
+            "encodedFiles": []
+        }
+
+        session = AnalysisSession.new_from(project_root, args.entry_point)
+        release_memory()
+
+        is_return_all = args.all
+
+        with NVML() as nvml:
+            data['analysisState']['hardware_info'] = hardware_information(nvml)
+            if args.measure_breakdown or is_return_all:
+                data['analysisState']['breakdown'] = next_message_to_dict(measure_breakdown(session, nvml))
+
+                operation_tree = data['analysisState']['breakdown']['operationTree']
+                if not args.exclude_source and operation_tree is not None:
+                    data['encodedFiles'] = files_encoded_unique(operation_tree)
+
+        if args.measure_throughput or is_return_all:
+            data['analysisState']['throughput'] = next_message_to_dict(measure_throughput(session))
+
+        if args.habitat_predict or is_return_all:
+            data['analysisState']['habitat'] = next_message_to_dict(habitat_predict(session))
+
+        if args.measure_utilization or is_return_all:
+            data['analysisState']['utilization'] = next_message_to_dict(measure_utilization(session))
+
+        if args.energy_compute or is_return_all:
+            data['analysisState']['energy'] = next_message_to_dict(energy_compute(session))
+
+        with open(args.output, "w") as json_file:
+            json.dump(data, json_file, indent=4)
+
+    except AnalysisError as ex: 
+        print_analysis_error(ex)
+        sys.exit(1)
+
+def main(args): 
+    check_skyline_preconditions(args)
+    initialize_skyline(args)
+    actual_main(args)
\ No newline at end of file
diff --git a/deepview_profile/nvml.py b/deepview_profile/nvml.py
index 5497fa4..7903098 100644
--- a/deepview_profile/nvml.py
+++ b/deepview_profile/nvml.py
@@ -19,3 +19,12 @@ def get_memory_capacity(self):
         # TODO: Support multiple devices
         handle = pynvml.nvmlDeviceGetHandleByIndex(0)
         return pynvml.nvmlDeviceGetMemoryInfo(handle)
+
+    def get_device_names(self):
+        device_names = []
+        for i in range(pynvml.nvmlDeviceGetCount()):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8")
+            device_names.append(device_name)
+        return device_names
+    
diff --git a/deepview_profile/utils.py b/deepview_profile/utils.py
index 4f43bc6..f26af8b 100644
--- a/deepview_profile/utils.py
+++ b/deepview_profile/utils.py
@@ -1,10 +1,51 @@
 import torch 
 import logging
 import gc
+import os
+import base64
+
+from google.protobuf.json_format import MessageToDict
 
 logger = logging.getLogger(__name__)
 
 def release_memory():
     logger.debug("Emptying cache")
     gc.collect()
-    torch.cuda.empty_cache()
\ No newline at end of file
+    torch.cuda.empty_cache()
+
+def next_message_to_dict(object):
+    message = next(object)
+    return MessageToDict(message)
+
+def files_encoded_unique(operation_tree):
+    encoded_files = []
+
+    for analysis in operation_tree:
+        context_info_map = analysis['operation'].get('contextInfoMap', None)
+        if context_info_map is not None and len(context_info_map) > 0:
+            filename = list(context_info_map[0]['context']['filePath']['components']).pop()
+
+            already_in_list = next((item for item in encoded_files if item['name'] == filename), None)
+            if not already_in_list:
+                file_path = os.path.join("", *list(context_info_map[0]['context']['filePath']['components']))
+
+                encoded_file = encode_file("", file_path)
+                encoded_files.append(encoded_file)
+
+    return encoded_files
+
+def encode_file(root, file): 
+    file_dict = None
+    if os.path.splitext(file)[1] == ".py" and file != "entry_point.py":
+        file_dict = {
+            "name": file,
+            "content": ""
+        }
+
+        filename = os.path.join(root, file)
+
+        with open(filename, "r") as f:
+            file_content = f.read()
+            file_dict["content"] = base64.b64encode(file_content.encode("utf-8")).decode("utf-8")
+
+    return file_dict