pytorch · namannandan · Jun 29, 2023 · Jun 8, 2023 · Jun 19, 2023 · Jun 20, 2023
diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py
@@ -150,6 +150,8 @@ def install_torchserve(skip_ts_install, hw, ts_version):
     # install_dependencies.py
     if hw == "gpu":
         cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu117"
+    elif hw == "neuronx":
+        cmd = "python ts_scripts/install_dependencies.py --environment dev --neuronx"
     else:
         cmd = "python ts_scripts/install_dependencies.py --environment dev"
     execute(cmd, wait=True)

diff --git a/benchmarks/benchmark_config_neuronx.yaml b/benchmarks/benchmark_config_neuronx.yaml
@@ -10,6 +10,7 @@
 # or a list of model configure yaml files with full path
 models:
   - "bert_neuronx.yaml"
+  - "opt_6.7b_neuronx.yaml"
 
 # benchmark on "cpu", "gpu", "neuron" or "neuronx".
 # "cpu" is set if "hardware" is not specified

diff --git a/benchmarks/models_config/opt_6.7b_neuronx.yaml b/benchmarks/models_config/opt_6.7b_neuronx.yaml
@@ -0,0 +1,68 @@
+---
+opt_6.7b_neuronx_batch_1:
+  scripted_mode:
+    benchmark_engine: "ab"
+    url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_1.tar.gz
+    workers:
+      - 1
+    batch_delay: 100
+    batch_size:
+      - 1
+    input: "./examples/large_models/inferentia2/sample_text.txt"
+    requests: 2000
+    concurrency: 10
+    backend_profiling: False
+    exec_env: "local"
+    processors:
+      - "neuronx"
+
+opt_6.7b_neuronx_batch_2:
+  scripted_mode:
+    benchmark_engine: "ab"
+    url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_2.tar.gz
+    workers:
+      - 1
+    batch_delay: 100
+    batch_size:
+      - 2
+    input: "./examples/large_models/inferentia2/sample_text.txt"
+    requests: 2000
+    concurrency: 10
+    backend_profiling: False
+    exec_env: "local"
+    processors:
+      - "neuronx"
+
+opt_6.7b_neuronx_batch_4:
+  scripted_mode:
+    benchmark_engine: "ab"
+    url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_4.tar.gz
+    workers:
+      - 1
+    batch_delay: 100
+    batch_size:
+      - 4
+    input: "./examples/large_models/inferentia2/sample_text.txt"
+    requests: 2000
+    concurrency: 10
+    backend_profiling: False
+    exec_env: "local"
+    processors:
+      - "neuronx"
+
+opt_6.7b_neuronx_batch_8:
+  scripted_mode:
+    benchmark_engine: "ab"
+    url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_8.tar.gz
+    workers:
+      - 1
+    batch_delay: 100
+    batch_size:
+      - 8
+    input: "./examples/large_models/inferentia2/sample_text.txt"
+    requests: 2000
+    concurrency: 10
+    backend_profiling: False
+    exec_env: "local"
+    processors:
+      - "neuronx"
diff --git a/requirements/neuronx.txt b/requirements/neuronx.txt
@@ -0,0 +1,7 @@
+--extra-index-url https://pip.repos.neuron.amazonaws.com
+numpy==1.21.6
+protobuf==3.20.3
+grpcio-tools==1.48.2
+neuronx-cc
+torch-neuronx
+transformers-neuronx
diff --git a/requirements/torch_neuronx_linux.txt b/requirements/torch_neuronx_linux.txt
@@ -0,0 +1,7 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+-r torch_common.txt
+torch==1.13.1+cpu
+torchvision==0.14.1+cpu
+torchtext==0.14.1
+torchaudio==0.13.1+cpu
+torchdata==0.5.1
diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py
@@ -38,6 +38,13 @@ def install_torch_packages(self, cuda_version):
                 os.system(
                     f"{sys.executable} -m pip install -U -r requirements/torch_{cuda_version}_{platform.system().lower()}.txt"
                 )
+        elif args.neuronx:
+            torch_neuronx_requirements_file = os.path.join(
+                "requirements", "torch_neuronx_linux.txt"
+            )
+            os.system(
+                f"{sys.executable} -m pip install -U -r {torch_neuronx_requirements_file}"
+            )
         else:
             os.system(
                 f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}.txt"
@@ -67,6 +74,13 @@ def install_python_packages(self, cuda_version, requirements_file_path, nightly)
             gpu_requirements_file = os.path.join("requirements", "common_gpu.txt")
             os.system(f"{sys.executable} -m pip install -U -r {gpu_requirements_file}")
 
+        # Install dependencies for Inferentia2
+        if args.neuronx:
+            neuronx_requirements_file = os.path.join("requirements", "neuronx.txt")
+            os.system(
+                f"{sys.executable} -m pip install -U -r {neuronx_requirements_file}"
+            )
+
     def install_node_packages(self):
         os.system(
             f"{self.sudo_cmd}npm install -g newman newman-reporter-htmlextra markdown-link-check"
@@ -193,6 +207,11 @@ def get_brew_version():
         choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117", "cu118"],
         help="CUDA version for torch",
     )
+    parser.add_argument(
+        "--neuronx",
+        action="store_true",
+        help="Install dependencies for inferentia2 support",
+    )
     parser.add_argument(
         "--environment",
         default="prod",