diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py index d7bf07f062..0973f8ef8c 100644 --- a/benchmarks/auto_benchmark.py +++ b/benchmarks/auto_benchmark.py @@ -150,6 +150,8 @@ def install_torchserve(skip_ts_install, hw, ts_version): # install_dependencies.py if hw == "gpu": cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu117" + elif hw == "neuronx": + cmd = "python ts_scripts/install_dependencies.py --environment dev --neuronx" else: cmd = "python ts_scripts/install_dependencies.py --environment dev" execute(cmd, wait=True) diff --git a/benchmarks/benchmark_config_neuronx.yaml b/benchmarks/benchmark_config_neuronx.yaml index b8cb3ecf68..8f8876dc24 100644 --- a/benchmarks/benchmark_config_neuronx.yaml +++ b/benchmarks/benchmark_config_neuronx.yaml @@ -10,6 +10,7 @@ # or a list of model configure yaml files with full path models: - "bert_neuronx.yaml" + - "opt_6.7b_neuronx.yaml" # benchmark on "cpu", "gpu", "neuron" or "neuronx". # "cpu" is set if "hardware" is not specified diff --git a/benchmarks/models_config/opt_6.7b_neuronx.yaml b/benchmarks/models_config/opt_6.7b_neuronx.yaml new file mode 100644 index 0000000000..99df9cc78b --- /dev/null +++ b/benchmarks/models_config/opt_6.7b_neuronx.yaml @@ -0,0 +1,68 @@ +--- +opt_6.7b_neuronx_batch_1: + scripted_mode: + benchmark_engine: "ab" + url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_1.tar.gz + workers: + - 1 + batch_delay: 100 + batch_size: + - 1 + input: "./examples/large_models/inferentia2/sample_text.txt" + requests: 2000 + concurrency: 10 + backend_profiling: False + exec_env: "local" + processors: + - "neuronx" + +opt_6.7b_neuronx_batch_2: + scripted_mode: + benchmark_engine: "ab" + url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_2.tar.gz + workers: + - 1 + batch_delay: 100 + batch_size: + - 2 + input: "./examples/large_models/inferentia2/sample_text.txt" + requests: 2000 + concurrency: 10 + backend_profiling: False + exec_env: "local" + processors: + - "neuronx" + +opt_6.7b_neuronx_batch_4: + scripted_mode: + benchmark_engine: "ab" + url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_4.tar.gz + workers: + - 1 + batch_delay: 100 + batch_size: + - 4 + input: "./examples/large_models/inferentia2/sample_text.txt" + requests: 2000 + concurrency: 10 + backend_profiling: False + exec_env: "local" + processors: + - "neuronx" + +opt_6.7b_neuronx_batch_8: + scripted_mode: + benchmark_engine: "ab" + url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_8.tar.gz + workers: + - 1 + batch_delay: 100 + batch_size: + - 8 + input: "./examples/large_models/inferentia2/sample_text.txt" + requests: 2000 + concurrency: 10 + backend_profiling: False + exec_env: "local" + processors: + - "neuronx" diff --git a/requirements/neuronx.txt b/requirements/neuronx.txt new file mode 100644 index 0000000000..929da067a1 --- /dev/null +++ b/requirements/neuronx.txt @@ -0,0 +1,7 @@ +--extra-index-url https://pip.repos.neuron.amazonaws.com +numpy==1.21.6 +protobuf==3.20.3 +grpcio-tools==1.48.2 +neuronx-cc +torch-neuronx +transformers-neuronx diff --git a/requirements/torch_neuronx_linux.txt b/requirements/torch_neuronx_linux.txt new file mode 100644 index 0000000000..adb26c1450 --- /dev/null +++ b/requirements/torch_neuronx_linux.txt @@ -0,0 +1,7 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +-r torch_common.txt +torch==1.13.1+cpu +torchvision==0.14.1+cpu +torchtext==0.14.1 +torchaudio==0.13.1+cpu +torchdata==0.5.1 diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index b5b9b4b0a7..6aef56db73 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -38,6 +38,13 @@ def install_torch_packages(self, cuda_version): os.system( f"{sys.executable} -m pip install -U -r requirements/torch_{cuda_version}_{platform.system().lower()}.txt" ) + elif args.neuronx: + torch_neuronx_requirements_file = os.path.join( + "requirements", "torch_neuronx_linux.txt" + ) + os.system( + f"{sys.executable} -m pip install -U -r {torch_neuronx_requirements_file}" + ) else: os.system( f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}.txt" @@ -67,6 +74,13 @@ def install_python_packages(self, cuda_version, requirements_file_path, nightly) gpu_requirements_file = os.path.join("requirements", "common_gpu.txt") os.system(f"{sys.executable} -m pip install -U -r {gpu_requirements_file}") + # Install dependencies for Inferentia2 + if args.neuronx: + neuronx_requirements_file = os.path.join("requirements", "neuronx.txt") + os.system( + f"{sys.executable} -m pip install -U -r {neuronx_requirements_file}" + ) + def install_node_packages(self): os.system( f"{self.sudo_cmd}npm install -g newman newman-reporter-htmlextra markdown-link-check" @@ -193,6 +207,11 @@ def get_brew_version(): choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117", "cu118"], help="CUDA version for torch", ) + parser.add_argument( + "--neuronx", + action="store_true", + help="Install dependencies for inferentia2 support", + ) parser.add_argument( "--environment", default="prod",