Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove arguments related to cost-savings #1230

Merged
merged 5 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 29 additions & 227 deletions user_tools/src/spark_rapids_pytools/rapids/qualification.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ toolOutput:
- 'Strongly Recommended'
- 'Recommended'
groupColumns:
enabled: false
keys:
- Vendor
- Driver Host
Expand Down
6 changes: 1 addition & 5 deletions user_tools/src/spark_rapids_pytools/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -19,19 +19,15 @@
from spark_rapids_pytools.wrappers.databricks_aws_wrapper import DBAWSWrapper
from spark_rapids_pytools.wrappers.databricks_azure_wrapper import DBAzureWrapper
from spark_rapids_pytools.wrappers.dataproc_wrapper import DataprocWrapper
from spark_rapids_pytools.wrappers.dataproc_gke_wrapper import DataprocGKEWrapper
from spark_rapids_pytools.wrappers.emr_wrapper import EMRWrapper
from spark_rapids_pytools.wrappers.onprem_wrapper import OnPremWrapper


def main():
fire.Fire({
'emr': EMRWrapper,
'dataproc': DataprocWrapper,
'dataproc-gke': DataprocGKEWrapper,
'databricks-aws': DBAWSWrapper,
'databricks-azure': DBAzureWrapper,
'onprem': OnPremWrapper
})


Expand Down
232 changes: 3 additions & 229 deletions user_tools/src/spark_rapids_pytools/wrappers/databricks_aws_wrapper.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -14,233 +14,17 @@


"""Wrapper class to run tools associated with RAPIDS Accelerator for Apache Spark plugin on DATABRICKS_AZURE."""
from spark_rapids_pytools.cloud_api.sp_types import DeployMode
from spark_rapids_pytools.common.utilities import Utils, ToolLogging

from spark_rapids_pytools.common.utilities import ToolLogging
from spark_rapids_pytools.rapids.diagnostic import Diagnostic
from spark_rapids_pytools.rapids.profiling import ProfilingAsLocal
from spark_rapids_pytools.rapids.qualification import QualFilterApp, QualificationAsLocal, QualGpuClusterReshapeType
from spark_rapids_tools import CspEnv
from spark_rapids_tools.utils import Utilities


class CliDBAzureLocalMode: # pylint: disable=too-few-public-methods
"""
A wrapper that runs the RAPIDS Accelerator tools locally on the dev machine for DATABRICKS_AZURE.
"""

@staticmethod
def qualification(cpu_cluster: str = None,
eventlogs: str = None,
profile: str = None,
local_folder: str = None,
remote_folder: str = None,
gpu_cluster: str = None,
tools_jar: str = None,
credentials_file: str = None,
filter_apps: str = QualFilterApp.tostring(QualFilterApp.get_default()),
gpu_cluster_recommendation: str = QualGpuClusterReshapeType.tostring(
QualGpuClusterReshapeType.get_default()),
estimation_model: str = None,
jvm_heap_size: int = None,
verbose: bool = None,
cpu_discount: int = None,
gpu_discount: int = None,
global_discount: int = None,
**rapids_options) -> None:
"""
The Qualification tool analyzes Spark events generated from CPU based Spark applications to
help quantify the expected acceleration and costs savings of migrating a Spark application
or query to GPU. The wrapper downloads dependencies and executes the analysis on the local
dev machine.
:param cpu_cluster: The Databricks-cluster on which the Spark applications were executed. The argument
can be a Databricks-cluster ID or a valid path to the cluster's properties file (json format)
generated by the databricks-CLI.
:param eventlogs: Event log filenames or ABFS (Azure Blob File System) storage directories
containing event logs (comma separated). If missing, the wrapper reads the Spark's
property `spark.eventLog.dir` defined in `cpu_cluster`. This property should be included
in the output of `databricks clusters get CLUSTER_ID [flags]`.
Note that the wrapper will raise an exception if the property is not set.
:param profile: A named Databricks profile to get the settings/credentials of the Databricks CLI.
:param local_folder: Local work-directory path to store the output and to be used as root
directory for temporary folders/files. The final output will go into a subdirectory called
${local_folder}/qual-${EXEC_ID} where exec_id is an auto-generated unique identifier of the
execution. If the argument is NONE, the default value is the env variable
RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any; or the current working directory.
:param remote_folder: An ABFS (Azure Blob File System) folder where the output is uploaded at the end
of execution. If no value is provided, the output will be only available on local disk.
:param gpu_cluster: The Databricks-cluster on which the Spark applications are planned to be migrated.
The argument can be a Databricks-cluster ID or a valid path to the cluster's properties file
(json format) generated by the databricks-CLI. If missing, the wrapper maps the databricks machine
instances of the original cluster into databricks instances that support GPU acceleration.
:param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
or remote ABFS url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
from maven repo.
:param credentials_file: The local path of JSON file that contains the application credentials.
If missing, the wrapper looks for "DATABRICKS_CONFIG_FILE" environment variable
to provide the location of a credential file. The default credentials file exists as
"~/.databrickscfg" on Unix, Linux, or macOS
:param filter_apps: filtering criteria of the applications listed in the final STDOUT table
is one of the following (all, speedups, savings, top_candidates).
Note that this filter does not affect the CSV report.
"all" means no filter applied. "speedups" lists all the apps that are either
'Recommended', or 'Strongly Recommended' based on speedups. "savings"
lists all the apps that have positive estimated GPU savings except for the apps that
are "Not Applicable". "top_candidates" lists all apps that have unsupported operators
stage duration less than 25% of app duration and speedups greater than 1.3x.
:param gpu_cluster_recommendation: The type of GPU cluster recommendation to generate.
It accepts one of the following ("CLUSTER", "JOB" and the default value "MATCH").
"MATCH": keep GPU cluster same number of nodes as CPU cluster;
"CLUSTER": recommend optimal GPU cluster by cost for entire cluster;
"JOB": recommend optimal GPU cluster by cost per job.
:param estimation_model: Model used to calculate the estimated GPU duration and cost savings.
It accepts one of the following:
"xgboost": an XGBoost model for GPU duration estimation
"speedups": set by default. It uses a simple static estimated speedup per operator.
:param jvm_heap_size: The maximum heap size of the JVM in gigabytes.
:param verbose: True or False to enable verbosity to the wrapper script.
:param cpu_discount: A percent discount for the cpu cluster cost in the form of an integer value
(e.g. 30 for 30% discount).
:param gpu_discount: A percent discount for the gpu cluster cost in the form of an integer value
(e.g. 30 for 30% discount).
:param global_discount: A percent discount for both the cpu and gpu cluster costs in the form of an
integer value (e.g. 30 for 30% discount).
:param rapids_options: A list of valid Qualification tool options.
Note that the wrapper ignores ["output-directory", "platform"] flags, and it does not support
multiple "spark-property" arguments.
For more details on Qualification tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/qualification/jar-usage.html#running-the-qualification-tool-standalone-on-spark-event-logs
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j',
Utilities.get_system_memory_in_gb())
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
filter_apps = Utils.get_value_or_pop(filter_apps, rapids_options, 'f')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
wrapper_qual_options = {
'platformOpts': {
# the databricks profile
'profile': profile,
'credentialFile': credentials_file,
'deployMode': DeployMode.LOCAL,
},
'migrationClustersProps': {
'cpuCluster': cpu_cluster,
'gpuCluster': gpu_cluster
},
'jobSubmissionProps': {
'remoteFolder': remote_folder,
'platformArgs': {
'jvmMaxHeapSize': jvm_heap_size
}
},
'eventlogs': eventlogs,
'filterApps': filter_apps,
'toolsJar': tools_jar,
'gpuClusterRecommendation': gpu_cluster_recommendation,
'cpuDiscount': cpu_discount,
'gpuDiscount': gpu_discount,
'globalDiscount': global_discount,
'estimationModel': estimation_model
}
QualificationAsLocal(platform_type=CspEnv.DATABRICKS_AZURE,
cluster=None,
output_folder=local_folder,
wrapper_options=wrapper_qual_options,
rapids_options=rapids_options).launch()

@staticmethod
def profiling(gpu_cluster: str = None,
worker_info: str = None,
eventlogs: str = None,
profile: str = None,
local_folder: str = None,
remote_folder: str = None,
tools_jar: str = None,
credentials_file: str = None,
jvm_heap_size: int = None,
verbose: bool = None,
**rapids_options) -> None:
"""
The Profiling tool analyzes both CPU or GPU generated event logs and generates information
which can be used for debugging and profiling Apache Spark applications.
:param gpu_cluster: The Databricks-cluster on which the Spark applications were executed. The argument
can be a Databricks-cluster ID or a valid path to the cluster's properties file (json format)
generated by the databricks-CLI. If missing, then the argument worker_info has to be provided.
:param worker_info: A path pointing to a yaml file containing the system information of a
worker node. It is assumed that all workers are homogenous.
If missing, the wrapper pulls the worker info from the "gpu_cluster".
:param eventlogs: Event log filenames or ABFS (Azure Blob File System) storage directories
containing event logs (comma separated). If missing, the wrapper reads the Spark's
property `spark.eventLog.dir` defined in `gpu_cluster`. This property should be included
in the output of `databricks clusters get CLUSTER_ID [flags]`.
Note that the wrapper will raise an exception if the property is not set.
:param profile: A named Databricks profile to get the settings/credentials of the Databricks CLI.
:param local_folder: Local work-directory path to store the output and to be used as root
directory for temporary folders/files. The final output will go into a subdirectory called
${local_folder}/prof-${EXEC_ID} where exec_id is an auto-generated unique identifier of the
execution. If the argument is NONE, the default value is the env variable
RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY if any; or the current working directory.
:param remote_folder: An ABFS (Azure Blob File System) folder where the output is uploaded at the end
of execution. If no value is provided, the output will be only available on local disk.
:param tools_jar: Path to a bundled jar including Rapids tool. The path is a local filesystem,
or remote S3 url. If missing, the wrapper downloads the latest rapids-4-spark-tools_*.jar
from maven repo.
:param credentials_file: The local path of JSON file that contains the application credentials.
If missing, the wrapper looks for "DATABRICKS_CONFIG_FILE" environment variable
to provide the location of a credential file. The default credentials file exists as
"~/.databrickscfg" on Unix, Linux, or macOS.
:param verbose: True or False to enable verbosity to the wrapper script.
:param jvm_heap_size: The maximum heap size of the JVM in gigabytes.
:param rapids_options: A list of valid Profiling tool options.
Note that the wrapper ignores ["output-directory", "worker-info"] flags, and it does not support
multiple "spark-property" arguments.
For more details on Profiling tool options, please visit
https://docs.nvidia.com/spark-rapids/user-guide/latest/profiling/jar-usage.html#prof-tool-title-options
"""
verbose = Utils.get_value_or_pop(verbose, rapids_options, 'v', False)
profile = Utils.get_value_or_pop(profile, rapids_options, 'p')
credentials_file = Utils.get_value_or_pop(credentials_file, rapids_options, 'c')
gpu_cluster = Utils.get_value_or_pop(gpu_cluster, rapids_options, 'g')
remote_folder = Utils.get_value_or_pop(remote_folder, rapids_options, 'r')
jvm_heap_size = Utils.get_value_or_pop(jvm_heap_size, rapids_options, 'j', Utilities.get_system_memory_in_gb())
eventlogs = Utils.get_value_or_pop(eventlogs, rapids_options, 'e')
tools_jar = Utils.get_value_or_pop(tools_jar, rapids_options, 't')
worker_info = Utils.get_value_or_pop(worker_info, rapids_options, 'w')
local_folder = Utils.get_value_or_pop(local_folder, rapids_options, 'l')
if verbose:
# when debug is set to true set it in the environment.
ToolLogging.enable_debug_mode()
wrapper_prof_options = {
'platformOpts': {
# the databricks profile
'profile': profile,
'credentialFile': credentials_file,
'deployMode': DeployMode.LOCAL,
},
'migrationClustersProps': {
'gpuCluster': gpu_cluster
},
'jobSubmissionProps': {
'remoteFolder': remote_folder,
'platformArgs': {
'jvmMaxHeapSize': jvm_heap_size
}
},
'eventlogs': eventlogs,
'toolsJar': tools_jar,
'autoTunerFileInput': worker_info
}
ProfilingAsLocal(platform_type=CspEnv.DATABRICKS_AZURE,
output_folder=local_folder,
wrapper_options=wrapper_prof_options,
rapids_options=rapids_options).launch()

@staticmethod
def diagnostic(cluster: str,
profile: str = None,
Expand Down Expand Up @@ -294,10 +78,8 @@ def diagnostic(cluster: str,

class DBAzureWrapper: # pylint: disable=too-few-public-methods
"""
A wrapper script to run RAPIDS Accelerator tools (Qualification, Profiling, and Diagnostic) on Databricks_Azure.
A wrapper script to run RAPIDS Accelerator tools Diagnostic on Databricks_Azure.
"""

def __init__(self):
self.qualification = CliDBAzureLocalMode.qualification
self.profiling = CliDBAzureLocalMode.profiling
self.diagnostic = CliDBAzureLocalMode.diagnostic
Loading