Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create nightly job for torch.compile benchmarks #2835

Merged
merged 19 commits into from
Dec 9, 2023
Merged
43 changes: 43 additions & 0 deletions .github/workflows/benchmark_torch_compile_nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Benchmark torch.compile models nightly

on:
# run every day at 9:15pm
schedule:
- cron: '15 21 * * *'

jobs:
nightly:
strategy:
fail-fast: false
runs-on: [self-hosted, gpu]
timeout-minutes: 1320
steps:
- name: Clean up previous run
run: |
echo "Cleaning up previous run"
cd $RUNNER_WORKSPACE
pwd
cd ..
pwd
rm -rf _tool
- name: Setup Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.8
architecture: x64
- name: Setup Java 17
uses: actions/setup-java@v3
with:
distribution: 'zulu'
java-version: '17'
- name: Checkout TorchServe
uses: actions/checkout@v3
with:
submodules: recursive
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y apache2-utils
pip install -r benchmarks/requirements-ab.txt
- name: Benchmark gpu nightly
run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_torch_compile_gpu.yaml --skip false --nightly True
24 changes: 18 additions & 6 deletions benchmarks/auto_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,16 @@ def load_benchmark_config(bm_config_path, skip_ts_install, skip_upload):
return benchmark_config.bm_config


def benchmark_env_setup(bm_config, skip_ts_install):
install_torchserve(skip_ts_install, bm_config["hardware"], bm_config["version"])
def benchmark_env_setup(bm_config, skip_ts_install, nightly):
install_torchserve(
skip_ts_install, bm_config["hardware"], bm_config["version"], nightly
)
setup_benchmark_path(bm_config["model_config_path"])
build_model_json_config(bm_config["models"])
enable_launcher_with_logical_core(bm_config["hardware"])


def install_torchserve(skip_ts_install, hw, ts_version):
def install_torchserve(skip_ts_install, hw, ts_version, nightly):
if skip_ts_install:
return

Expand All @@ -154,6 +156,8 @@ def install_torchserve(skip_ts_install, hw, ts_version):
cmd = "python ts_scripts/install_dependencies.py --environment dev --neuronx"
else:
cmd = "python ts_scripts/install_dependencies.py --environment dev"
if nightly:
cmd += " --nightly_torch"
execute(cmd, wait=True)
print("successfully install install_dependencies.py")

Expand Down Expand Up @@ -290,9 +294,12 @@ def main():
)
parser.add_argument(
"--skip_upload",
help="true: skip uploading commands . default: false",
help="true: skip uploading commands. default: false",
)
parser.add_argument(
"--nightly",
help="true: install nightly version of torch package. default: false",
)

arguments = parser.parse_args()
skip_ts_config = (
False
Expand All @@ -304,8 +311,13 @@ def main():
if arguments.skip_upload is not None and arguments.skip_upload.lower() == "true"
else False
)
nightly = (
True
if arguments.nightly is not None and arguments.nightly.lower() == "true"
else False
)
bm_config = load_benchmark_config(arguments.input, skip_ts_config, skip_upload)
benchmark_env_setup(bm_config, skip_ts_config)
benchmark_env_setup(bm_config, skip_ts_config, nightly)
run_benchmark(bm_config)
clean_up_benchmark_env(bm_config)
print("benchmark_serving.sh finished successfully.")
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark-ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def warm_up():
click.secho("\n\nExecuting warm-up ...", fg="green")

ab_cmd = (
f"ab -c {execution_params['concurrency']} -n {execution_params['requests']/10} -k -p "
f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']/10} -k -p "
f"{execution_params['tmp_dir']}/benchmark/input -T {execution_params['content_type']} "
f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
f"{execution_params['result_file']}"
Expand All @@ -247,7 +247,7 @@ def run_benchmark():

click.secho("\n\nExecuting inference performance tests ...", fg="green")
ab_cmd = (
f"ab -c {execution_params['concurrency']} -n {execution_params['requests']} -k -p "
f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']} -k -p "
f"{execution_params['tmp_dir']}/benchmark/input -T {execution_params['content_type']} "
f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
f"{execution_params['result_file']}"
Expand Down
48 changes: 48 additions & 0 deletions benchmarks/benchmark_config_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Torchserve version is to be installed. It can be one of the options
# - branch : "master"
# - nightly: "2022.3.16"
# - release: "0.5.3"
# Nightly build will be installed if "ts_version" is not specifiged
#ts_version:
# branch: &ts_version "master"

# a list of model configure yaml files defined in benchmarks/models_config
# or a list of model configure yaml files with full path
models:
- "bert_torch_compile_gpu.yaml"
- "resnet50_torch_compile_gpu.yaml"
- "vgg16_torch_compile_gpu.yaml"

# benchmark on "cpu" or "gpu".
# "cpu" is set if "hardware" is not specified
hardware: &hardware "gpu"

# load prometheus metrics report to remote storage or local different path if "metrics_cmd" is set.
# the command line to load prometheus metrics report to remote system.
# Here is an example of AWS cloudwatch command:
# Note:
# - keep the values order as the same as the command definition.
# - set up the command before enabling `metrics_cmd`.
# For example, aws client and AWS credentials need to be setup before trying this example.
metrics_cmd:
- "cmd": "aws cloudwatch put-metric-data"
- "--namespace": ["torchserve_benchmark_nightly_torch_compile_", *hardware]
- "--region": "us-east-2"
- "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'

# load report to remote storage or local different path if "report_cmd" is set.
# the command line to load report to remote storage.
# Here is an example of AWS cloudwatch command:
# Note:
# - keep the values order as the same as the command.
# - set up the command before enabling `report_cmd`.
# For example, aws client, AWS credentials and S3 bucket
# need to be setup before trying this example.
# - "today()" is a keyword to apply current date in the path
# For example, the dest path in the following example is
# s3://torchserve-model-serving/benchmark/2022-03-18/gpu
report_cmd:
- "cmd": "aws s3 cp --recursive"
- "source": '/tmp/ts_benchmark/'
- "dest": ['s3://torchserve-benchmark/torch-compile-nightly', "today()", *hardware]

42 changes: 42 additions & 0 deletions benchmarks/models_config/bert_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
bert:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/bert-scripted.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt"
requests: 50000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
torch_compile_default_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/bert-default.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt"
requests: 50000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
42 changes: 42 additions & 0 deletions benchmarks/models_config/resnet50_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
resnet50:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/resnet-50-scripted.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/image_classifier/kitten.jpg"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
torch_compile_default_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/resnet-50-default.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/image_classifier/kitten.jpg"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
42 changes: 42 additions & 0 deletions benchmarks/models_config/vgg16_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
vgg16:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/vgg-16-scripted.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/image_classifier/kitten.jpg"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
torch_compile_default_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/vgg-16-default.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
- 2
- 4
- 8
- 16
input: "./examples/image_classifier/kitten.jpg"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
- "gpus": "all"
Loading