diff --git a/reduce-llms-for-testing b/reduce-llms-for-testing new file mode 160000 index 00000000000000..234efef10f329f --- /dev/null +++ b/reduce-llms-for-testing @@ -0,0 +1 @@ +Subproject commit 234efef10f329fe31d2a2d5f1f07ef3d541479ef diff --git a/tests/test_lora_conversion_and_inference.sh b/tests/test_lora_conversion_and_inference.sh index a99d1831c7c8f7..b92f4facffcde8 100755 --- a/tests/test_lora_conversion_and_inference.sh +++ b/tests/test_lora_conversion_and_inference.sh @@ -1,12 +1,13 @@ #!/bin/bash set -e -MODELS_REPO=https://huggingface.co/ltoniazzi/reduce-llms-for-testing +MODELS_REPO=reduce-llms-for-testing +MODELS_REPO_URL=https://huggingface.co/ltoniazzi/$MODELS_REPO # Clone the Hugging Face repository if the directory does not exist -if [ ! -d "reduce-llms-for-testing" ]; then +if [ ! -d "$MODELS_REPO" ]; then echo "Cloning the Hugging Face repository..." - git clone $MODELS_REPO + git clone $MODELS_REPO_URL else echo "Repository already exists. Skipping clone." fi @@ -17,54 +18,55 @@ results=() run_conversion_and_inference_lora() { local model_name=$1 local size_matrix=$2 - local model_size_mb=$3 + local bos_token=$3 # Convert safetensors to gguf echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..." - python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/base \ + python convert_hf_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/base \ + --outfile $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ --outtype f32 echo "Running convert_lora_to_gguf.py for $model_name with size $size_matrix..." - python3 convert_lora_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/lora \ - --base reduce-llms-for-testing/$model_name/size=$size_matrix/base \ + python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/lora \ + --base $MODELS_REPO/$model_name/size=$size_matrix/base \ --outtype f32 - echo "Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb..." + echo "Running llama-export-lora with lora for $model_name with size $size_matrix..." llama-export-lora \ - -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ - -o reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \ - --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf + -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + -o $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \ + --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf # Run inference - echo "Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb..." - OUTPUT_BASE=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ - -p "When forty winters shall besiege" -n 50 --seed 42) + echo "Running llama-cli without lora for $model_name with size $size_matrix..." + OUTPUT_BASE=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + -p "When forty winters shall besiege" -n 50 --seed 42) - echo "Running llama-cli with lora for $model_name with size $size_matrix and model size $model_size_mb..." - OUTPUT_LORA_HOT=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ - --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \ - -p "I see a little silhouetto" -n 50 --seed 42) + echo "Running llama-cli with lora for $model_name with size $size_matrix..." + OUTPUT_LORA_HOT=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \ + -p "I see a little silhouetto" -n 50 --seed 42) - echo "Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb..." - OUTPUT_LORA_MERGED=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \ - -p "I see a little silhouetto" -n 50 --seed 42) + echo "Running llama-cli with exported lora for $model_name with size $size_matrix..." + OUTPUT_LORA_MERGED=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \ + -p "I see a little silhouetto" -n 50 --seed 42) # Store the results in the regular array results+=(" - \n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb:\033[0m + \n\033[1mResults for $model_name with size $size_matrix:\033[0m \n • \033[32mBase:\n$OUTPUT_BASE \n • \033[34mLora hot:\n$OUTPUT_LORA_HOT \n • \033[36mLora merged:\n$OUTPUT_LORA_MERGED - \n\n \033[0m + \n \033[0m ") - echo "All steps completed for $model_name with size $size_matrix and model size $model_size_mb!" + echo "All steps completed for $model_name with size $size_matrix!" } # Array of parameters to iterate over declare -a params=( - "Gemma2ForCausalLM 64 19M" - # "AnotherModel 128 25M" + "Gemma2ForCausalLM 64 " + "LlamaForCausalLM 64 <|begin_of_text|>" ) # Loop through each set of parameters