diff --git a/reduce-llms-for-testing b/reduce-llms-for-testing
new file mode 160000
index 00000000000000..234efef10f329f
--- /dev/null
+++ b/reduce-llms-for-testing
@@ -0,0 +1 @@
+Subproject commit 234efef10f329fe31d2a2d5f1f07ef3d541479ef
diff --git a/tests/test_lora_conversion_and_inference.sh b/tests/test_lora_conversion_and_inference.sh
index a99d1831c7c8f7..b92f4facffcde8 100755
--- a/tests/test_lora_conversion_and_inference.sh
+++ b/tests/test_lora_conversion_and_inference.sh
@@ -1,12 +1,13 @@
 #!/bin/bash
 set -e
 
-MODELS_REPO=https://huggingface.co/ltoniazzi/reduce-llms-for-testing
+MODELS_REPO=reduce-llms-for-testing
+MODELS_REPO_URL=https://huggingface.co/ltoniazzi/$MODELS_REPO
 
 # Clone the Hugging Face repository if the directory does not exist
-if [ ! -d "reduce-llms-for-testing" ]; then
+if [ ! -d "$MODELS_REPO" ]; then
     echo "Cloning the Hugging Face repository..."
-    git clone $MODELS_REPO
+    git clone $MODELS_REPO_URL
 else
     echo "Repository already exists. Skipping clone."
 fi
@@ -17,54 +18,55 @@ results=()
 run_conversion_and_inference_lora() {
     local model_name=$1
     local size_matrix=$2
-    local model_size_mb=$3
+    local bos_token=$3
 
     # Convert safetensors to gguf
     echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..."
-    python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/base \
+    python convert_hf_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/base \
+    --outfile $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
     --outtype f32
 
     echo "Running convert_lora_to_gguf.py for $model_name with size $size_matrix..."
-    python3 convert_lora_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/lora \
-    --base reduce-llms-for-testing/$model_name/size=$size_matrix/base \
+    python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/lora \
+    --base $MODELS_REPO/$model_name/size=$size_matrix/base \
     --outtype f32
 
-    echo "Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb..."
+    echo "Running llama-export-lora with lora for $model_name with size $size_matrix..."
     llama-export-lora \
-    -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
-    -o reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \
-    --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf
+    -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
+    -o $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \
+    --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf
 
     # Run inference
-    echo "Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb..."
-    OUTPUT_BASE=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
-    -p "<bos>When forty winters shall besiege" -n 50 --seed 42)
+    echo "Running llama-cli without lora for $model_name with size $size_matrix..."
+    OUTPUT_BASE=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
+    -p "When forty winters shall besiege" -n 50 --seed 42)
 
-    echo "Running llama-cli with lora for $model_name with size $size_matrix and model size $model_size_mb..."
-    OUTPUT_LORA_HOT=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
-    --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \
-    -p "<bos>I see a little silhouetto" -n 50 --seed 42)
+    echo "Running llama-cli with lora for $model_name with size $size_matrix..."
+    OUTPUT_LORA_HOT=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
+    --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \
+    -p "I see a little silhouetto" -n 50 --seed 42)
 
-    echo "Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb..."
-    OUTPUT_LORA_MERGED=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \
-    -p "<bos>I see a little silhouetto" -n 50 --seed 42)
+    echo "Running llama-cli with exported lora for $model_name with size $size_matrix..."
+    OUTPUT_LORA_MERGED=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \
+    -p "I see a little silhouetto" -n 50 --seed 42)
 
     # Store the results in the regular array
     results+=("
-    \n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb:\033[0m
+    \n\033[1mResults for $model_name with size $size_matrix:\033[0m
     \n  • \033[32mBase:\n$OUTPUT_BASE
     \n  • \033[34mLora hot:\n$OUTPUT_LORA_HOT
     \n  • \033[36mLora merged:\n$OUTPUT_LORA_MERGED
-    \n\n \033[0m
+    \n \033[0m
     ")
 
-    echo "All steps completed for $model_name with size $size_matrix and model size $model_size_mb!"
+    echo "All steps completed for $model_name with size $size_matrix!"
 }
 
 # Array of parameters to iterate over
 declare -a params=(
-    "Gemma2ForCausalLM 64 19M"
-    # "AnotherModel 128 25M"
+    "Gemma2ForCausalLM 64 <bos>"
+    "LlamaForCausalLM 64 <|begin_of_text|>"
 )
 
 # Loop through each set of parameters