Skip to content

Commit

Permalink
Merge pull request #121 from amosproj/91_Benchmark_trained_model
Browse files Browse the repository at this point in the history
Added benchmark
  • Loading branch information
christianwielenberg authored Jul 17, 2024
2 parents 89fd972 + 954d927 commit a4c52ad
Show file tree
Hide file tree
Showing 8 changed files with 553 additions and 0 deletions.
49 changes: 49 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_cncf.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash -l
#SBATCH --job-name=christian
#SBATCH --ntasks=1
#SBATCH --gres=gpu:a100:1 -p a100
#SBATCH --output=R-%x.%j.out
#SBATCH --error=R-%x.%j.err
#SBATCH --mail-type=end,fail
#SBATCH --time=01:15:00
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV

# Set proxy to access internet from the node
export http_proxy=http://proxy:80
export https_proxy=http://proxy:80

module purge
module load python
module load cuda
module load cudnn

# Conda
conda activate amos_env # replace with the name of your conda env

# Copy data to `$TMPDIR` to have faster access, recommended esp. for long trainings
#cp -r "/home/janus/iwb6-datasets/FRAGMENTS" "$TMPDIR"
# in case you have to extract an archive, e.g. a dataset use:
cd "$TMPDIR"

# create a temporary job dir on $WORK
mkdir ${WORK}/$SLURM_JOB_ID

# copy input file from location where job was submitted, and run
cp -r ${SLURM_SUBMIT_DIR}/benchmark_results_cncf.py .
mkdir -p output/

(
while true; do
nvidia-smi > ${SLURM_SUBMIT_DIR}/gpu_usage_${SLURM_JOB_ID}.log
sleep 60
done
) &

# Run training script (with data copied to node)
torchrun benchmark_results_cncf.py

# Create a directory on $HOME and copy the results from our training
mkdir ${HOME}/$SLURM_JOB_ID
cp -r output ${HOME}/$SLURM_JOB_ID

49 changes: 49 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_gemma.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash -l
#SBATCH --job-name=christian
#SBATCH --ntasks=1
#SBATCH --gres=gpu:a100:1 -p a100
#SBATCH --output=R-%x.%j.out
#SBATCH --error=R-%x.%j.err
#SBATCH --mail-type=end,fail
#SBATCH --time=01:15:00
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV

# Set proxy to access internet from the node
export http_proxy=http://proxy:80
export https_proxy=http://proxy:80

module purge
module load python
module load cuda
module load cudnn

# Conda
conda activate amos_env # replace with the name of your conda env

# Copy data to `$TMPDIR` to have faster access, recommended esp. for long trainings
#cp -r "/home/janus/iwb6-datasets/FRAGMENTS" "$TMPDIR"
# in case you have to extract an archive, e.g. a dataset use:
cd "$TMPDIR"

# create a temporary job dir on $WORK
mkdir ${WORK}/$SLURM_JOB_ID

# copy input file from location where job was submitted, and run
cp -r ${SLURM_SUBMIT_DIR}/benchmark_results_gemma.py .
mkdir -p output/

(
while true; do
nvidia-smi > ${SLURM_SUBMIT_DIR}/gpu_usage_${SLURM_JOB_ID}.log
sleep 60
done
) &

# Run training script (with data copied to node)
torchrun benchmark_results_gemma.py

# Create a directory on $HOME and copy the results from our training
mkdir ${HOME}/$SLURM_JOB_ID
cp -r output ${HOME}/$SLURM_JOB_ID

49 changes: 49 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_llama.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash -l
#SBATCH --job-name=christian
#SBATCH --ntasks=1
#SBATCH --gres=gpu:a100:1 -p a100
#SBATCH --output=R-%x.%j.out
#SBATCH --error=R-%x.%j.err
#SBATCH --mail-type=end,fail
#SBATCH --time=01:15:00
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV

# Set proxy to access internet from the node
export http_proxy=http://proxy:80
export https_proxy=http://proxy:80

module purge
module load python
module load cuda
module load cudnn

# Conda
conda activate amos_env # replace with the name of your conda env

# Copy data to `$TMPDIR` to have faster access, recommended esp. for long trainings
#cp -r "/home/janus/iwb6-datasets/FRAGMENTS" "$TMPDIR"
# in case you have to extract an archive, e.g. a dataset use:
cd "$TMPDIR"

# create a temporary job dir on $WORK
mkdir ${WORK}/$SLURM_JOB_ID

# copy input file from location where job was submitted, and run
cp -r ${SLURM_SUBMIT_DIR}/benchmark_results_llama.py .
mkdir -p output/

(
while true; do
nvidia-smi > ${SLURM_SUBMIT_DIR}/gpu_usage_${SLURM_JOB_ID}.log
sleep 60
done
) &

# Run training script (with data copied to node)
torchrun benchmark_results_llama.py

# Create a directory on $HOME and copy the results from our training
mkdir ${HOME}/$SLURM_JOB_ID
cp -r output ${HOME}/$SLURM_JOB_ID

68 changes: 68 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_prometheus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from transformers import (AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
)
import pandas as pd
from datasets import load_dataset

from prometheus_eval.vllm import VLLM
from prometheus_eval import PrometheusEval
from prometheus_eval.prompts import RELATIVE_PROMPT

dataset = load_dataset("Kubermatic/Merged_QAs", split = "train[-50:]")
gemma_answers = pd.read_csv("gemma_results.csv")
cncf_answers = pd.read_csv("cncf_results.csv")
llama_answers = pd.read_csv("llama_results.csv")

model = VLLM(model="prometheus-eval/prometheus-7b-v2.0")
judge = PrometheusEval(model=model, relative_grade_template=RELATIVE_PROMPT)

import json

# Create the data structure as per the given example
data_dic = {
"metadata": [
{
"source_path": "First Run",
"custom_fields_schema": []
}
],
"models": [
{"name": "Base Gemma Model 9B"},
{"name": "Finetuned Model 9B"}
],
"examples": [
]
}
for i in range(len(gemma_answers.index)):
data = {
"instruction": dataset["Question"][i],
"response_A": gemma_answers.iloc[i, 0],
"response_B": cncf_answers.iloc[i, 0],
"reference_answer": f"{dataset['Question'][i]} \n{dataset['Answer'][i]}",
"rubric": "Which is the better answer to the question taking into account the reference answer?"
}

feedback, score = judge.single_relative_grade(**data)
if score == 'A':
score = -1
else:
score = 1
example = {
"input_text": dataset["Question"][i],
"tags": ["CNCF"], # A list of keywords for categorizing prompts
"output_text_a": gemma_answers.iloc[i, 0],
"output_text_b": cncf_answers.iloc[i, 0],
"score": score, # Score from the judge LLM
"individual_rater_scores": [],
"custom_fields": {}
}
data_dic["examples"].append(example)

print("Feedback:", feedback)
print("Score:", score)

file_path = "output/prometheus.json"

with open(file_path, 'w') as json_file:
json.dump(data_dic, json_file, indent=4)
52 changes: 52 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_prometheus.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash -l
#SBATCH --job-name=christian
#SBATCH --ntasks=1
#SBATCH --gres=gpu:a100:1 -p a100
#SBATCH --output=R-%x.%j.out
#SBATCH --error=R-%x.%j.err
#SBATCH --mail-type=end,fail
#SBATCH --time=01:15:00
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV

# Set proxy to access internet from the node
export http_proxy=http://proxy:80
export https_proxy=http://proxy:80

module purge
module load python
module load cuda
module load cudnn

# Conda
conda activate amos_env # replace with the name of your conda env

# Copy data to `$TMPDIR` to have faster access, recommended esp. for long trainings
#cp -r "/home/janus/iwb6-datasets/FRAGMENTS" "$TMPDIR"
# in case you have to extract an archive, e.g. a dataset use:
cd "$TMPDIR"

# create a temporary job dir on $WORK
mkdir ${WORK}/$SLURM_JOB_ID

# copy input file from location where job was submitted, and run
cp -r ${SLURM_SUBMIT_DIR}/benchmark_prometheus.py .
cp -r ${SLURM_SUBMIT_DIR}/cncf_results.csv .
cp -r ${SLURM_SUBMIT_DIR}/gemma_results.csv .
cp -r ${SLURM_SUBMIT_DIR}/llama_results.csv .
mkdir -p output/

(
while true; do
nvidia-smi > ${SLURM_SUBMIT_DIR}/gpu_usage_${SLURM_JOB_ID}.log
sleep 60
done
) &

# Run training script (with data copied to node)
python benchmark_prometheus.py

# Create a directory on $HOME and copy the results from our training
mkdir ${HOME}/$SLURM_JOB_ID
cp -r output ${HOME}/$SLURM_JOB_ID

80 changes: 80 additions & 0 deletions src/hpc_scripts/benchmark/benchmark/benchmark_results_cncf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from huggingface_hub import HfApi, login
import os
HF_TOKEN = os.getenv('HF_TOKEN', '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')
api = HfApi()
login(HF_TOKEN, add_to_git_credential=True)

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import transformers
from datasets import load_dataset
import torch
import re
from tqdm import tqdm
import torch.distributed as dist
import torch.multiprocessing as mp
from multiprocessing import freeze_support
from peft import PeftModel, PeftConfig


import csv
import gc
import traceback
NUM_GPUS = 1
dataset = load_dataset("Kubermatic/Merged_QAs", split="train[-50:]")

# Function to run inference
def run_inference(rank, world_size, data_length):
dist.init_process_group("gloo", rank=rank, world_size=world_size)

config = PeftConfig.from_pretrained("Kubermatic/DeepCNCF9BAdapter")
base_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-9b-it", device_map=f"cuda:{rank}")

model = PeftModel.from_pretrained(base_model, "Kubermatic/DeepCNCF9BAdapter", device_map=f"cuda:{rank}")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
model.eval()

start_index = int(rank * data_length / world_size)
end_index = data_length if rank == NUM_GPUS else int((rank + 1) * data_length / world_size - 1)

with torch.no_grad():
for i in tqdm(range(start_index, end_index)):
question = dataset['Question'][i]

try:
chat = [
{ "role": "user", "content": question},
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors="pt").to("cuda")

outputs = model.generate(**inputs,
max_new_tokens=512,
do_sample=True)
result = tokenizer.decode(outputs[0])
print(result)

with open(f"output/benchmark_results{rank}.csv", 'a+', newline='') as file:
write = csv.writer(file)
write.writerow([result])

# Clean up to free memory
del question_text, result, question, answer
torch.cuda.empty_cache()
gc.collect()
except Exception as error:
print("An error occurred:", type(error).__name__, "–", error, flush = True)
torch.cuda.empty_cache()
gc.collect()

del model
torch.cuda.empty_cache()
gc.collect()

# Main script
if __name__ == '__main__':
freeze_support()
data_length = len(dataset['Question'])
mp.spawn(run_inference,
args=(NUM_GPUS, data_length),
nprocs=NUM_GPUS,
join=True)
Loading

0 comments on commit a4c52ad

Please sign in to comment.