Skip Open Llama 3B from nightly due to DRAM limitations. (#1063)

Keeping Llama 3.2 1B as ref.
tenstorrent · Jan 17, 2025 · 37649ab · 37649ab
1 parent 4386d0d
commit 37649ab
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/forge/test/mlir/llama/tests/test_llama_prefil.py b/forge/test/mlir/llama/tests/test_llama_prefil.py
@@ -56,6 +56,9 @@ def test_llama_prefil_on_device_decode_on_cpu(model_path):
     - The first part is the prefilling of the model on the device.
     - The second part is the decoding of the model on the CPU without KV cache.
     """
+    if model_path == "openlm-research/open_llama_3b":
+        pytest.skip("Insufficient host DRAM to run this model (requires a bit more than 32 GB during compile time)")
+
     # Load Llama model and tokenizer
     model, tokenizer = load_model(model_path, return_dict=True)