AlibabaPAI · anw90 · Dec 19, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/.github/workflows/accuracy_benchmark.yml b/.github/workflows/accuracy_benchmark.yml
@@ -0,0 +1,25 @@
+name: Daily Accuracy Benchmark
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Runs daily at 3:00 AM, Beijing time.
+    - cron: '0 19 * * *' # This is UTC time
+
+jobs:
+  accuracy_benchmark:
+    runs-on: self-hosted
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Perform the accuracy benchmark
+      run: cd benchmarks/accuracy && bash ./run.sh
+      env:
+        OSS_AK_ID: ${{ secrets.OSS_AK_ID }}
+        OSS_AK_SECRET: ${{ secrets.OSS_AK_SECRET }}
+        OSS_ENDPOINT: ${{ secrets.OSS_ENDPOINT }}
+        M6_TENANT: ${{ secrets.M6_TENANT }}
+        MIT_SPIDER_TOKEN: ${{ secrets.MIT_SPIDER_TOKEN }}
+        MIT_SPIDER_URL: ${{ secrets.MIT_SPIDER_URL }}
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,4 @@ log/
 
 !/torchacc/dist
 torchacc/version.py
+temp
diff --git a/benchmarks/accuracy/README.md b/benchmarks/accuracy/README.md
@@ -0,0 +1,110 @@
+# Accuracy Benchmark
+
+## Overview
+
+The Accuracy Benchmark evaluates the performance of TorchAcc using [FastChat](https://github.com/AlibabaPAI/FastChat_TorchAcc) against a baseline established by Torch native. The benchmark aims to ensure that TorchAcc maintains comparable accuracy levels with Torch native.
+
+## Evaluation Process
+
+To perform the evaluation, follow these steps:
+
+1. Set Baseline
+
+    ```bash
+    bash ./llama.sh <ORIGINAL_MODEL_DIR> 0
+    ```
+
+    Run the Torch native job using `run_clm.py`, a script copied from HuggingFace Transformers. `ORIGINAL_MODEL_DIR` is the path to the original model checkpoint downloaded from HuggingFace or ModelScope. `0` indicates that this training job does not use `torchacc`.
+
+2. Run TorchAcc
+
+    ```bash
+    bash ./llama.sh <ORIGINAL_MODEL_DIR> 1
+    ```
+
+    Run the TorchAcc job using the same script as used for Torch native. `ORIGINAL_MODEL_DIR` is the path to the original model checkpoint downloaded from HuggingFace or ModelScope. `1` indicates that this training job uses `torchacc`.
+
+
+3. Evaluate Original
+
+    ```bash
+    bash ./mtbench.sh <ORIGINAL_MODEL_DIR>
+    ```
+
+    Evaluate the original checkpoint using FastChat. `ORIGINAL_MODEL_DIR` is the path to the original model checkpoint downloaded from HuggingFace or ModelScope.
+
+4. Evaluate Outputs
+
+    ```bash
+    bash ./mtbench.sh <TORCH_NATIVE_CHECKPOINT>
+    bash ./mtbench.sh <TORCHACC_CHECKPOINT>
+    ```
+
+    Evaluate the checkpoints output by Torch native job and TorchAcc. `TORCH_NATIVE_CHECKPOINT` is the path to the model checkpoint output by torch native job. `TORCHACC_CHECKPOINT` is the path to the model checkpoint output by torchacc job.
+
+5. Compare Results
+
+    Compare the training and evaluation results.
+
+
+You can simply execute the `run.sh` script to perform all the steps.
+
+
+## Main Files
+
+
+All the files used in the accuracy benchmark are listed below.
+
+* run.sh
+
+    The script integrates all the steps.
+
+    ```bash
+    bash ./run.sh [local_model_dir]
+    ```
+
+    You could pass the local model checkpoint path to the script. If no local path is specified, it will download `llama-3.2-1B` from ModelScope.
+
+* llama.sh
+
+    ```bash
+    # Usage: $0 <local_model_dir> <use_torchacc> [checkpiont_output_dir]
+    #  local_model_dir: Path to the local directory where the model will be saved.
+    #  use_torchacc: 0 or 1 to indicate whether to use TorchAcc.
+    #  checkpoint_output_dir: Optional. Default is the model name in <local_model_dir>.
+    bash ./llama.sh <local_model_dir> <use_torchacc> [checkpiont_output_dir]
+    ```
+
+    The script runs the llama training job using `run_clm.py` with either Torch native or TorchAcc.
+
+* fastchat.sh
+
+    The script runs the evaluation task on your checkpoint. The `ENV_VARIABLES` can be obtained from the maintainers of TorchAcc.
+
+    ```bash
+    ENV_VARIABLES bash ./fastchat.sh <local_model_dir>
+    ```
+
+## Evaluation Results
+
+The evaluation results are shown as follows:
+
+```
+
+==================== Training Results ====================
+Torch train loss                = 2.091632914827291
+TorchAcc train loss             = 2.0917317353245495
+Torch train runtime (s)         = 2552.8252
+TorchAcc train runtime (s)      = 2272.1399
+Torch train steps per second    = 5.785
+TorchAcc train steps per second = 6.5
+
+=================== Evaluation Results ===================
+Original Model Score            = 1.4625
+Torch Model Score               = 1.1125
+TorchAcc Model Score            = 1.100629
+
+More details can be found in    = ./result/20241205_223009
+==========================================================
+
+```
diff --git a/benchmarks/accuracy/fastchat.sh b/benchmarks/accuracy/fastchat.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: MIT_SPIDER_TOKEN=*** MIT_SPIDER_URL=*** M6_TENANT=*** $0 <local_model_dir>"
+  echo "You must provide exactly 1 parameters."
+  exit 1
+fi
+
+if [[ -z "${MIT_SPIDER_TOKEN}" || -z "${MIT_SPIDER_URL}" || -z "${M6_TENANT}" ]]; then
+  echo "Error: One or more required environment variables are not set."
+  echo "Required variables:"
+  [[ -z "${MIT_SPIDER_TOKEN}" ]] && echo "  - MIT_SPIDER_TOKEN"
+  [[ -z "${MIT_SPIDER_URL}" ]] && echo "  - MIT_SPIDER_URL"
+  [[ -z "${M6_TENANT}" ]] && echo "  - M6_TENANT"
+  exit 1
+fi
+
+MODEL_DIR=$(realpath $1)
+MODEL_ID=$(basename "$MODEL_DIR")_$(date +"%Y%m%d_%H%M%S")
+NUM_GPUS_TOTAL=1
+JUDGMENT_PARALLEL=4
+
+function install_fastchat {
+  if [[ ! -d "FastChat_TorchAcc" ]]; then
+    git clone https://github.com/AlibabaPAI/FastChat_TorchAcc.git
+  fi
+
+  output=$(python -m pip list | grep fschat)
+  if [[ -n $output ]]; then
+    echo "All requirements are installed."
+  else
+    echo "Install requirements ..."
+    pushd ./FastChat_TorchAcc
+    pip install --use-pep517 -e ".[model_worker,llm_judge]"
+    pip install gradio
+    popd
+  fi
+}
+
+function run_bench {
+  SCRIPT_DIR=./FastChat_TorchAcc/fastchat/llm_judge/
+  if [[ ! -d "$SCRIPT_DIR" ]]; then
+    echo "Directory $SCRIPT_DIR is not exist."
+    exit 1
+  fi
+  if [[ ! -d "$MODEL_DIR" ]]; then
+    echo "Directory $MODEL_DIR is not exist."
+    exit 1
+  fi
+
+  cd $SCRIPT_DIR
+
+  echo "====gen start===="
+  python gen_model_answer.py --model-path $MODEL_DIR --model-id $MODEL_ID --num-gpus-total $NUM_GPUS_TOTAL
+  echo "====gen done===="
+
+  echo "====judge start===="
+  python gen_judgment.py --model-list $MODEL_ID --parallel $JUDGMENT_PARALLEL
+  echo "====judge done===="
+
+  echo "====show score===="
+  # python show_result.py --model-list $MODEL_ID
+  python show_result_by_category.py --model-list $MODEL_ID
+
+}
+
+install_fastchat
+run_bench
diff --git a/benchmarks/accuracy/llama.sh b/benchmarks/accuracy/llama.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+if [[ $# -ne 2 && $# -ne 3 ]]; then
+  echo "Usage: $0 <local_model_dir> <use_torchacc> [checkpiont_output_dir]"
+  echo "  local_model_dir: Path to the local directory where the model will be saved."
+  echo "  use_torchacc: 0 or 1 to indicate whether to use TorchAcc."
+  echo "  checkpoint_output_dir: Optional. Default is the model name in <local_model_dir>."
+  exit 1
+fi
+
+MODEL_DIR=$(realpath "$1")
+USE_TORCHACC=$2
+RANK="${RANK:-0}"
+WORLD_SIZE="${WORLD_SIZE:-1}"
+MASTER_ADDR="${MASTER_ADDR:-127.0.0.1}"
+MASTER_PORT="${MASTER_PORT:-9010}"
+NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
+BATCH_SIZE="${BATCH_SIZE:-2}"
+SEQLEN="${SEQLEN:-1024}"
+DATASET_NAME="${DATASET_NAME:-'wikitext'}"
+DATASET_CONFIG_NAME="${DATASET_CONFIG_NAME:-'wikitext-102-raw-v1'}"
+PRECISION="bf16=true"
+RUN_CLM=./run_clm.py
+
+
+if [ "$USE_TORCHACC" -eq 0 ]; then
+  export USE_TORCH_XLA=0
+  FSDP_CONFIG="llama_fsdp_torch.json"
+  TEMP_OUTPUT_DIR=$(basename "$MODEL_DIR")_torch
+  OUTPUTS_DIR=${3:-$TEMP_OUTPUT_DIR}
+elif [ "$USE_TORCHACC" -eq 1 ]; then
+  export PJRT_DEVICE=CUDA
+  export XLA_FLAGS='--xla_gpu_memory_limit_slop_factor=500 --xla_multiheap_size_constraint_per_heap=15032385536'
+  export ACCELERATE_USE_FSDP=true
+  export PJRT_USE_TORCH_ALLOCATOR=true
+  export LOW_CPU_MEM_USAGE=1
+  export XLA_PERSISTENT_CACHE_PATH=./compiled_cache
+  FSDP_CONFIG="llama_fsdp_acc.json"
+  TEMP_OUTPUT_DIR=$(basename "$MODEL_DIR")_acc
+  OUTPUTS_DIR=${3:-$TEMP_OUTPUT_DIR}
+else
+  echo "The third argument must be 0 or 1"
+  exit 1
+fi
+
+# Launch the job
+torchrun --nproc_per_node "$NPROC_PER_NODE" \
+  --nnodes "$WORLD_SIZE" \
+  --node_rank "$RANK" \
+  --master_port "$MASTER_PORT" \
+  --master_addr "$MASTER_ADDR" \
+  "$RUN_CLM" \
+  --num_train_epochs 2 \
+  --dataset_name $DATASET_NAME \
+  --dataset_config_name $DATASET_CONFIG_NAME \
+  --use_fast_tokenizer false \
+  --per_device_train_batch_size "$BATCH_SIZE" \
+  --per_device_eval_batch_size "$BATCH_SIZE" \
+  --do_train \
+  --output_dir "$OUTPUTS_DIR" \
+  --overwrite_output_dir \
+  --model_name_or_path "$MODEL_DIR" \
+  --tokenizer_name "$MODEL_DIR" \
+  --trust_remote_code true \
+  --low_cpu_mem_usage true \
+  --cache_dir ./cache \
+  --block_size "$SEQLEN" \
+  --optim adamw_torch \
+  --save_strategy no \
+  --logging_strategy steps \
+  --gradient_checkpointing no \
+  --logging_steps 100 \
+  --"$PRECISION" \
+  --fsdp "auto_wrap" \
+  --fsdp_config "$FSDP_CONFIG"
diff --git a/benchmarks/accuracy/llama_fsdp_acc.json b/benchmarks/accuracy/llama_fsdp_acc.json
@@ -0,0 +1,14 @@
+{
+    "fsdp_transformer_layer_cls_to_wrap": [
+        "LlamaDecoderLayer"
+    ],
+    "xla": true,
+    "xla_fsdp_settings": {
+        "compute_dtype": "bfloat16",
+        "buffer_dtype": "bfloat16",
+        "opt_flatten_overlap": true,
+        "pin_layout_in_collective_ops": false,
+        "flatten_parameters": true
+    },
+    "xla_fsdp_grad_ckpt": true
+}
diff --git a/benchmarks/accuracy/llama_fsdp_torch.json b/benchmarks/accuracy/llama_fsdp_torch.json
@@ -0,0 +1,6 @@
+{
+    "fsdp_transformer_layer_cls_to_wrap": [
+        "LlamaDecoderLayer"
+    ],
+    "activation_checkpointing": false
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -158,3 +158,4 @@ log/

		!/torchacc/dist
		torchacc/version.py
		temp