diff --git a/.github/workflows/benchmark_torch_compile_nightly.yml b/.github/workflows/benchmark_torch_compile_nightly.yml
index 827977ef05..310a7a5d00 100644
--- a/.github/workflows/benchmark_torch_compile_nightly.yml
+++ b/.github/workflows/benchmark_torch_compile_nightly.yml
@@ -20,10 +20,10 @@ jobs:
           cd ..
           pwd
           rm -rf _tool
-      - name: Setup Python 3.8
+      - name: Setup Python 3.10
         uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: "3.10"
           architecture: x64
       - name: Setup Java 17
         uses: actions/setup-java@v3
@@ -39,5 +39,9 @@ jobs:
           sudo apt-get update -y
           sudo apt-get install -y apache2-utils
           pip install -r benchmarks/requirements-ab.txt
+      - name: Install model-specific dependencies
+        run: |
+          chmod +x benchmarks/benchmark_model_dependencies.sh
+          source benchmarks/benchmark_model_dependencies.sh
       - name: Benchmark gpu nightly
         run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_torch_compile_gpu.yaml --skip false --nightly True
diff --git a/benchmarks/benchmark_config_torch_compile_gpu.yaml b/benchmarks/benchmark_config_torch_compile_gpu.yaml
index edc66899ea..e4d1a95673 100644
--- a/benchmarks/benchmark_config_torch_compile_gpu.yaml
+++ b/benchmarks/benchmark_config_torch_compile_gpu.yaml
@@ -11,6 +11,7 @@
 models:
     - "bert_torch_compile_gpu.yaml"
     - "resnet50_torch_compile_gpu.yaml"
+    - "sam_fast_torch_compile_gpu.yaml"
 
 # benchmark on "cpu" or "gpu".
 # "cpu" is set if "hardware" is not specified
diff --git a/benchmarks/benchmark_model_dependencies.sh b/benchmarks/benchmark_model_dependencies.sh
new file mode 100644
index 0000000000..ff70c91974
--- /dev/null
+++ b/benchmarks/benchmark_model_dependencies.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# Install dependencies and set environment variables for SAM Fast
+
+# Install dependencies
+pip install chardet
+chmod +x examples/large_models/segment_anything_fast/install_segment_anything_fast.sh
+source examples/large_models/segment_anything_fast/install_segment_anything_fast.sh
+
+# Turn off A100G optimization
+export SEGMENT_ANYTHING_FAST_USE_FLASH_4=0
+
+echo "Installed dependencies and set environment variables for SAM Fast"
+
diff --git a/benchmarks/models_config/sam_fast_torch_compile_gpu.yaml b/benchmarks/models_config/sam_fast_torch_compile_gpu.yaml
new file mode 100644
index 0000000000..d2c4b8b03e
--- /dev/null
+++ b/benchmarks/models_config/sam_fast_torch_compile_gpu.yaml
@@ -0,0 +1,18 @@
+---
+sam_fast_vit_b:
+    process_batch_size_4:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/sam-fast-vit-b-process-batch-size-4.tar.gz
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+        input: "./examples/large_models/segment_anything_fast/kitten.jpg"
+        requests: 1000
+        concurrency: 4
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
\ No newline at end of file
diff --git a/examples/large_models/segment_anything_fast/custom_handler.py b/examples/large_models/segment_anything_fast/custom_handler.py
index 97b2b3f1f3..eed9365856 100644
--- a/examples/large_models/segment_anything_fast/custom_handler.py
+++ b/examples/large_models/segment_anything_fast/custom_handler.py
@@ -29,6 +29,7 @@ def initialize(self, ctx):
             self.device = torch.device(
                 self.map_location + ":" + str(properties.get("gpu_id"))
             )
+            torch.cuda.set_device(self.device)
 
         model_type = ctx.model_yaml_config["handler"]["model_type"]
         sam_checkpoint = ctx.model_yaml_config["handler"]["sam_checkpoint"]