Add readme

AlibabaPAI · Dec 9, 2024 · a9848e6 · a9848e6
1 parent 252a8ee
commit a9848e6
Show file tree

Hide file tree

Showing 13 changed files with 188 additions and 570 deletions.
diff --git a/benchmarks/accuracy_benchmark/README.md b/benchmarks/accuracy_benchmark/README.md
@@ -0,0 +1,99 @@
+# Accuracy Benchmark
+
+## Overview
+
+The Accuracy Benchmark evaluates the performance of TorchAcc using [FastChat](https://github.com/AlibabaPAI/FastChat_TorchAcc) against a baseline established by Torch native. The benchmark aims to ensure that TorchAcc maintains comparable accuracy levels with Torch native.
+
+## Evaluation Process
+
+To perform the evaluation, follow these steps:
+
+1. Set Baseline
+
+    ```bash
+    bash ./llama.sh <ORIGINAL_MODEL_DIR> 0
+    ```
+
+    Run the Torch native job using `run_clm.py`, a script copied from HuggingFace Transformers.
+
+2. Run TorchAcc
+
+    ```bash
+    bash ./llama.sh <ORIGINAL_MODEL_DIR> 1
+    ```
+
+    Run the TorchAcc job using the same script as used for Torch native.
+
+3. Evaluate Original
+
+    ```bash
+    bash ./mtbench.sh <ORIGINAL_MODEL_DIR>
+    ```
+
+    Evaluate the original checkpoint using FastChat.
+
+4. Evaluate Outputs
+
+    ```bash
+    bash ./mtbench.sh <TORCH_NATIVE_CHECKPOINT>
+    bash ./mtbench.sh <TORCHACC_CHECKPOINT>
+    ```
+
+    Evaluate the checkpoints output by Torch native job and TorchAcc.
+
+5. Compare Results
+
+    Compare the training and evaluation results.
+
+
+You can simply execute the `run.sh` script to perform all the steps.
+
+## Main Files
+
+* run.sh
+
+    The script integrates all the steps.
+
+    ```bash
+    bash ./run.sh <local_model_dir>
+    ```
+
+* llama.sh
+
+    The script runs llama job using `run_clm.py` with either Torch native or TorchAcc.
+
+    ```bash
+    bash ./llama.sh <local_model_dir> <use_torchacc> [checkpiont_output_dir]
+    ```
+
+* fastchat.sh
+
+    The script runs the evaluation task on your checkpoint.
+
+    ```bash
+    ENV_VARIABLES bash ./fastchat.sh <local_model_dir>
+    ```
+
+## Evaluation Results
+
+The evaluation results are shown as follows:
+
+```
+
+==================== Training Results ====================
+Torch train loss                = 2.091632914827291
+TorchAcc train loss             = 2.0917317353245495
+Torch train runtime (s)         = 2552.8252
+TorchAcc train runtime (s)      = 2272.1399
+Torch train steps per second    = 5.785
+TorchAcc train steps per second = 6.5
+
+=================== Evaluation Results ===================
+Original Model Score            = 1.4625
+Torch Model Score               = 1.1125
+TorchAcc Model Score            = 1.100629
+
+More details can be found in    = ./result/20241205_223009
+==========================================================
+
+```
diff --git a/benchmarks/accuracy_benchmark/fastchat.patch b/benchmarks/accuracy_benchmark/fastchat.patch
diff --git a/benchmarks/accuracy_benchmark/mtbench.sh → benchmarks/accuracy_benchmark/fastchat.sh b/benchmarks/accuracy_benchmark/mtbench.sh → benchmarks/accuracy_benchmark/fastchat.sh
@@ -1,27 +1,24 @@
 #!/bin/bash
 
-# $1: local model directory
 if [ "$#" -ne 1 ]; then
-  echo "Usage: MIT_SPIDER_TOKEN=*** MIT_SPIDER_URL=*** $0 <local_model_dir>"
+  echo "Usage: MIT_SPIDER_TOKEN=*** MIT_SPIDER_URL=*** M6_TENANT=*** $0 <local_model_dir>"
   echo "You must provide exactly 1 parameters."
   exit 1
 fi
 
-if [[ -z "${MIT_SPIDER_TOKEN}" ]]; then
-  echo "Error: Environment variable MIT_SPIDER_TOKEN is not set." >&2
-  exit 1
-fi
-
-if [[ -z "${MIT_SPIDER_URL}" ]]; then
-  echo "Error: Environment variable MIT_SPIDER_URL is not set." >&2
+if [[ -z "${MIT_SPIDER_TOKEN}" || -z "${MIT_SPIDER_URL}" || -z "${M6_TENANT}" ]]; then
+  echo "Error: One or more required environment variables are not set."
+  echo "Required variables:"
+  [[ -z "${MIT_SPIDER_TOKEN}" ]] && echo "  - MIT_SPIDER_TOKEN"
+  [[ -z "${MIT_SPIDER_URL}" ]] && echo "  - MIT_SPIDER_URL"
+  [[ -z "${M6_TENANT}" ]] && echo "  - M6_TENANT"
   exit 1
 fi
 
 MODEL_DIR=$(realpath $1)
 MODEL_ID=$(basename "$MODEL_DIR")_$(date +"%Y%m%d_%H%M%S")
 NUM_GPUS_TOTAL=1
 JUDGMENT_PARALLEL=4
-export M6_TENANT=M6
 
 function install_fastchat {
   if [[ ! -d "FastChat" ]]; then

diff --git a/benchmarks/accuracy_benchmark/llama.sh b/benchmarks/accuracy_benchmark/llama.sh
@@ -15,9 +15,8 @@ WORLD_SIZE="${WORLD_SIZE:-1}"
 MASTER_ADDR="${MASTER_ADDR:-127.0.0.1}"
 MASTER_PORT="${MASTER_PORT:-9010}"
 NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
-BS="${BS:-2}"
+BATCH_SIZE="${BATCH_SIZE:-2}"
 SEQLEN="${SEQLEN:-1024}"
-TASK_TAG="${TASK_TAG:-0000}"
 PRECISION="bf16=true"
 RUN_CLM=./run_clm.py
 
@@ -33,10 +32,10 @@ elif [ "$USE_TORCHACC" -eq 1 ]; then
   export ACCELERATE_USE_FSDP=true
   export PJRT_USE_TORCH_ALLOCATOR=true
   export LOW_CPU_MEM_USAGE=1
-  export XLA_PERSISTENT_CACHE_PATH=./compiled_cache # uncomment this line to cache the compile results and speed up initialization.
+  export XLA_PERSISTENT_CACHE_PATH=./compiled_cache
+  FSDP_CONFIG="llama_fsdp_acc.json"
   TEMP_OUTPUT_DIR=$(basename "$MODEL_DIR")_acc
   OUTPUTS_DIR=${3:-$TEMP_OUTPUT_DIR}
-  FSDP_CONFIG="llama_fsdp_acc.json"
 else
   echo "The third argument must be 0 or 1"
   exit 1
@@ -53,8 +52,8 @@ torchrun --nproc_per_node "$NPROC_PER_NODE" \
   --dataset_name wikitext \
   --dataset_config_name wikitext-103-raw-v1 \
   --use_fast_tokenizer false \
-  --per_device_train_batch_size "$BS" \
-  --per_device_eval_batch_size "$BS" \
+  --per_device_train_batch_size "$BATCH_SIZE" \
+  --per_device_eval_batch_size "$BATCH_SIZE" \
   --do_train \
   --output_dir "$OUTPUTS_DIR" \
   --overwrite_output_dir \

diff --git a/benchmarks/accuracy_benchmark/llama_acc.sh b/benchmarks/accuracy_benchmark/llama_acc.sh