Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to llama.cpp for gguf conversion #1625

Merged
merged 3 commits into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ RUN set -e; \
&& curl -L --output /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-${GOBIN_VERSION} \
&& chmod +x /usr/local/bin/bazel \
&& useradd -ms /bin/bash cyclonedx \
&& npm install --global corepack@latest \
&& npm install --unsafe-perm -g node-gyp @microsoft/rush --omit=dev \
&& npx node-gyp install \
&& pecl channel-update pecl.php.net \
Expand Down
1 change: 1 addition & 0 deletions ci/Dockerfile-secure
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ RUN set -e; \
&& useradd -ms /bin/bash cyclonedx \
&& mv /root/.bashrc /home/cyclonedx/.bashrc \
&& chown -R cyclonedx:cyclonedx /home/cyclonedx/.bashrc \
&& npm install --global corepack@latest \
&& npm install --unsafe-perm -g node-gyp @microsoft/rush --omit=dev \
&& npx node-gyp install \
&& pecl channel-update pecl.php.net \
Expand Down
1 change: 1 addition & 0 deletions ci/base-images/debian/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ if [ x"${SKIP_NODEJS}" != "xyes" ]; then
chmod +x /root/.nvm/nvm.sh
source /root/.nvm/nvm.sh
nvm install ${NODE_VERSION}
npm install --global corepack@latest
fi
4 changes: 2 additions & 2 deletions contrib/fine-tuning/Modelfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM CycloneDX/cdx1-gguf
FROM ./cdx1-gguf-q8_0.gguf

PARAMETER num_ctx 16384
PARAMETER temperature 0.05
PARAMETER top_k 10
PARAMETER top_p 0.5

SYSTEM """You are cdxgen, a CycloneDX and an xBOM expert."""
SYSTEM """You are cdxgen, an expert in CycloneDX and xBOM."""

LICENSE """
apache-2.0
Expand Down
4 changes: 2 additions & 2 deletions contrib/fine-tuning/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ lms load CycloneDX/cdx1-mlx --exact --gpu max --identifier cdx1-test --context-l
System prompt:

```text
You are cdxgen, an xBOM and CycloneDX expert.
You are cdxgen, an expert in CycloneDX and xBOM.
```

### gguf testing with ollama
Expand Down Expand Up @@ -60,7 +60,7 @@ ollama show cdx1-gguf
top_p 0.5

System
You are cdxgen, a CycloneDX and an xBOM expert.
You are cdxgen, an expert in CycloneDX and xBOM.

License
apache-2.0
Expand Down
34 changes: 34 additions & 0 deletions contrib/fine-tuning/convert-gguf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -e
# Path to the latest llama.cpp compiled with all packages installed under conda
# git clone https://github.com/ggerganov/llama.cpp.git
# cd llama.cpp
# conda create --name llama.cpp python=3.12
# conda activate llama.cpp
# python -m pip install -r requirements.txt
# cmake .
TUNING_TOOL=mlx
HF_ORG=CycloneDX
TOOL_BASE_MODEL=cdx1
LLAMA_CPP_PATH=/Volumes/Work/sandbox/llama.cpp
cd $LLAMA_CPP_PATH
CDXGEN_FT_PATH=/Volumes/Work/CycloneDX/cdxgen/contrib/fine-tuning
GGUF_MODEL_Q8_0=${HF_ORG}/${TOOL_BASE_MODEL}-gguf-Q8_0-GGUF
FUSED_MODEL=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}

rm -rf ${GGUF_MODEL_Q8_0}
mkdir -p ${GGUF_MODEL_Q8_0}
python convert_hf_to_gguf.py --outtype q8_0 --outfile ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-Q8_0-GGUF/${TOOL_BASE_MODEL}-gguf-q8_0.gguf --model-name ${GGUF_MODEL_Q8_0} ${FUSED_MODEL}

cp ${CDXGEN_FT_PATH}/Modelfile ${GGUF_MODEL_Q8_0}/
# cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${GGUF_MODEL_Q8_0}/

### Testing with ollama
# cd ${GGUF_MODEL_Q8_0}
# ollama create cdx1-gguf -f Modelfile
# ollama show cdx1-gguf
# ollama run cdx1-gguf

export HF_HUB_ENABLE_HF_TRANSFER=0
huggingface-cli whoami
huggingface-cli upload --quiet --repo-type model ${GGUF_MODEL_Q8_0} ./${GGUF_MODEL_Q8_0} .
42 changes: 27 additions & 15 deletions contrib/fine-tuning/fine-tune-mlx.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/usr/bin/env bash
set -e
TUNING_TOOL=mlx
# This must be really ibm-granite. Wish mlx supported this.
BASE_MODEL=unsloth/phi-4
BASE_MODEL_MLX=${BASE_MODEL}-${TUNING_TOOL}
HF_ORG=CycloneDX
TOOL_BASE_MODEL=cdx1
NUM_LAYERS=16
ADAPTERS_PATH=adapters
DATASET_PATH=dataset

FUSED_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}
FUSED_GGUF_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-gguf
Expand All @@ -14,34 +17,45 @@ QUANT_MODEL_6BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-6bit
QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit

### mlx-lm needs train.jsonl and valid.jsonl
rm -rf dataset ${HF_ORG} adapters ${BASE_MODEL}
mkdir -p dataset ${HF_ORG}
rm -rf ${DATASET_PATH} ${HF_ORG} ${ADAPTERS_PATH} ${BASE_MODEL}
mkdir -p ${DATASET_PATH} ${HF_ORG}

node prepare.js dataset
node validator.js dataset
# Create a single train and valid jsonl from our dataset
# In the future, we can have a separate dataset pipeline
node prepare.js ${DATASET_PATH}

# Validate jsonlines to reduce errors in the model
# Need to validate and check for malicious code snippets here at some point
node validator.js ${DATASET_PATH}

# This step always pulls the latest base model from HF. Need to think about versioning and checksum to prevent model injection attacks
echo "Test base model with the prompt 'Tell me about cdxgen'. Usually yields a low-quality response."
mlx_lm.generate --model ${BASE_MODEL} --prompt "Tell me about cdxgen" --temp 0.05

# We first convert from HF to mlx
# We first convert from HF to mlx to speed up the rest of the process
# It is possible that the gguf export is getting broken due to this split processing?
rm -rf ${BASE_MODEL_MLX}
mlx_lm.convert --hf-path ${BASE_MODEL} --mlx-path ${BASE_MODEL_MLX}

echo "Weight-Decomposed Low-Rank Adaptation (DoRA) fine-tuning ${BASE_MODEL_MLX} with cdx1 dataset. This might take a while ..."
mlx_lm.lora --model ${BASE_MODEL_MLX} --train --data dataset --fine-tune-type lora --batch-size 1 --num-layers ${NUM_LAYERS} --iters 1000 --grad-checkpoint
# We use LoRA fine-tuning over DoRA due to better compatibility with vLLM and llama.cpp
echo "Low-Rank Adaptation (LoRA) fine-tuning ${BASE_MODEL_MLX} with cdx1 dataset. This might take a while ..."
mlx_lm.lora --model ${BASE_MODEL_MLX} --train --data dataset --adapter-path ${ADAPTERS_PATH} --fine-tune-type lora --batch-size 1 --num-layers ${NUM_LAYERS} --iters 1000 --grad-checkpoint

echo "Fuse model to ${FUSED_MODEL} using the cdx1 adapters"
rm -rf ${FUSED_MODEL} ${FUSED_GGUF_MODEL}
mlx_lm.fuse --model ${BASE_MODEL_MLX} --adapter-path adapters --hf-path ${FUSED_MODEL} --save-path ${FUSED_MODEL} --de-quantize --export-gguf --gguf-path cdx1-f16.gguf
# gguf export via mlx isn't working
# mlx_lm.fuse --model ${BASE_MODEL_MLX} --adapter-path adapters --hf-path ${FUSED_MODEL} --save-path ${FUSED_MODEL} --de-quantize --export-gguf --gguf-path cdx1-f16.gguf
mlx_lm.fuse --model ${BASE_MODEL_MLX} --adapter-path adapters --hf-path ${FUSED_MODEL} --save-path ${FUSED_MODEL} --de-quantize

echo "Test fused model with the prompt 'Tell me about cdxgen'. Must yield a better response."
mlx_lm.generate --model ./${FUSED_MODEL} --prompt "Tell me about cdxgen" --temp 0.05

mkdir -p ${FUSED_GGUF_MODEL}
mv ${FUSED_MODEL}/cdx1-f16.gguf ${FUSED_GGUF_MODEL}
cp Modelfile ${FUSED_GGUF_MODEL}/
cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${FUSED_GGUF_MODEL}/
sed -i'' 's|CycloneDX/cdx1-gguf|./cdx1-f16.gguf|g' ${FUSED_GGUF_MODEL}/Modelfile
# Not working
# mkdir -p ${FUSED_GGUF_MODEL}
# mv ${FUSED_MODEL}/cdx1-f16.gguf ${FUSED_GGUF_MODEL}
# cp Modelfile ${FUSED_GGUF_MODEL}/
# cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${FUSED_GGUF_MODEL}/
# sed -i'' 's|CycloneDX/cdx1-gguf|./cdx1-f16.gguf|g' ${FUSED_GGUF_MODEL}/Modelfile

echo "Create quantized models"
rm -rf ${QUANT_MODEL_8BIT}
Expand All @@ -58,5 +72,3 @@ rm -rf ${QUANT_MODEL_4BIT}
mlx_lm.convert --hf-path ${FUSED_MODEL} --mlx-path ${QUANT_MODEL_4BIT} -q --q-bits 4 --dtype bfloat16
echo "Test ${QUANT_MODEL_4BIT} with the prompt 'Tell me about cdxgen'. Must yield a better response."
mlx_lm.generate --model ./${QUANT_MODEL_4BIT} --prompt "Tell me about cdxgen" --temp 0.05

rm -rf dataset adapters ${BASE_MODEL}
2 changes: 0 additions & 2 deletions contrib/fine-tuning/upload-hf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ HF_ORG=CycloneDX
TUNING_TOOL=mlx
TOOL_BASE_MODEL=cdx1
FUSED_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}
FUSED_GGUF_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-gguf
QUANT_MODEL_8BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-8bit
QUANT_MODEL_6BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-6bit
QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit
Expand All @@ -22,4 +21,3 @@ huggingface-cli upload --quiet --repo-type model ${QUANT_MODEL_6BIT} ./${QUANT_M
huggingface-cli upload --quiet --repo-type model ${QUANT_MODEL_4BIT} ./${QUANT_MODEL_4BIT} .

huggingface-cli upload --quiet --repo-type model ${FUSED_MODEL} ./${FUSED_MODEL} .
huggingface-cli upload --quiet --repo-type model ${FUSED_GGUF_MODEL} ./${FUSED_GGUF_MODEL} .
Loading