Skip to content

Commit

Permalink
Support SUT configs for intel-mlperf-inference
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh committed Mar 15, 2024
1 parent b7715a9 commit 03e6260
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 14 deletions.
75 changes: 70 additions & 5 deletions cm-mlops/script/app-mlperf-inference-intel/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,13 @@ variations:
skip_inherit_variation_groups:
- run-mode
- device-info
- sut
- loadgen-batchsize
force_cache: true
- tags: get,generic-python-lib,_package.optimum
names:
- pip-package
- optimum

gptj_,pytorch:
adr:
Expand Down Expand Up @@ -312,6 +318,7 @@ variations:
- tags: get,generic-python-lib,_package.transformers
names:
- pip-package
- transformers
version: "4.28.1"
- tags: get,mlcommons,inference,src
names:
Expand Down Expand Up @@ -402,14 +409,11 @@ variations:
CM_MODEL: bert-99.9
CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3733910/files/model.onnx"

loadgen-batch-size.#:
batch_size.#:
group: loadgen-batchsize
env:
CM_MLPERF_LOADGEN_BATCH_SIZE: "#"

activation-count.#:
env:
CM_MODEL_BATCH_SIZE: "#"
#CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX1: "activation_count.#"

build-harness:
group: run-mode
Expand Down Expand Up @@ -455,6 +459,8 @@ variations:
skip_inherit_variation_groups:
- run-mode
- device-info
- sut
- loadgen-batchsize
force_cache: true

# Download MLPerf inference source
Expand Down Expand Up @@ -523,6 +529,10 @@ variations:
env:
INTEL_GPTJ_INT4: 'yes'

int8,gptj_:
env:
INTEL_GPTJ_INT4: 'no'

fp32:
group: precision
adr:
Expand All @@ -531,5 +541,60 @@ variations:
env:
CM_IMAGENET_ACCURACY_DTYPE: float32

sapphire-rapids.112c:
group: sut
env:
WARMUP: " --warmup"

sapphire-rapids.24c:
group: sut

sapphire-rapids.24c,gptj-99,offline,int8:
env:
KMP_BLOCKTIME: 10
WORKERS_PER_PROC: 1
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 8

sapphire-rapids.24c,gptj-99,offline,int4:
env:
KMP_BLOCKTIME: 10
WORKERS_PER_PROC: 1
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 5

sapphire-rapids.112c,gptj-99,offline,int8:
env:
KMP_BLOCKTIME: 1
WORKERS_PER_PROC: 2
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 14

sapphire-rapids.112c,gptj-99,offline,int4:
env:
NUM_PROC: 4
KMP_BLOCKTIME: 1
WORKERS_PER_PROC: 3
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 8

sapphire-rapids.112c,gptj-99,server,int8:
env:
KMP_BLOCKTIME: 1
WORKERS_PER_PROC: 2
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 1

sapphire-rapids.112c,gptj_,server,int4:
env:
KMP_BLOCKTIME: 1
WORKERS_PER_PROC: 4
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 1

sapphire-rapids.24c,bert_:
env:
WORKERS_PER_PROC: 1

docker:
docker_real_run: False
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fi

export CALIBRATION_DATA_JSON=${WORKLOAD_DATA}/calibration-data/cnn_dailymail_calibration.json
export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_validation.json
export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model
#export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model
#sudo -E bash run_quantization.sh
#bash run_quantization.sh
echo "${RUN_QUANTIZATION_CMD}"
Expand Down
4 changes: 4 additions & 0 deletions cm-mlops/script/app-mlperf-inference-intel/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,12 @@ def preprocess(i):
if env.get('INTEL_GPTJ_INT4', '') == 'yes':
model_precision = "int4"
env['INT4_MODEL_DIR'] = env['CM_ML_MODEL_PATH']
env['QUANTIZED_MODEL'] = os.path.join(env['INT4_MODEL_DIR'], "best_int4_model.pt")
env['PRECISION'] = "int4_bf16_mixed"
else:
env['INT8_MODEL_DIR'] = env['CM_ML_MODEL_PATH']
env['QUANTIZED_MODEL'] = os.path.join(env["INT8_MODEL_DIR"], "best_model.pt")
env['PRECISION'] = "int8"
env['CM_RUN_DIR'] = i['run_script_input']['path']
env['CM_RUN_CMD'] = "bash run_gptj_harness.sh "

Expand Down
15 changes: 7 additions & 8 deletions cm-mlops/script/app-mlperf-inference-intel/run_gptj_harness.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
#!/bin/bash
export PATH=${CM_CONDA_BIN_PATH}:$PATH

export KMP_BLOCKTIME=10
export KMP_BLOCKTIME=${KMP_BLOCKTIME}
export KMP_AFFINITY=granularity=fine,compact,1,0
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so

export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`
num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')

NUM_PROC=$num_numa
NUM_PROC=${NUM_PROC:-$num_numa}
CPUS_PER_PROC=$((num_physical_cores/num_numa))
WORKERS_PER_PROC=1
WORKERS_PER_PROC=3
TOTAL_SAMPLE_COUNT=13368
BATCH_SIZE=8
BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}
TIMESTAMP=$(date +%m-%d-%H-%M)
HOSTNAME=$(hostname)
#OUTPUT_DIR=offline-output-${HOSTNAME}-batch-${BATCH_SIZE}-procs-${NUM_PROC}-ins-per-proc-${WORKERS_PER_PROC}-${TIMESTAMP}
Expand All @@ -27,21 +27,20 @@ OUTPUT_DIR="${CM_MLPERF_OUTPUT_DIR}"
USER_CONF="${CM_MLPERF_USER_CONF}"


#--mode Performance \
cmd="python runner.py --workload-name gptj \
--scenario Offline \
--mode ${LOADGEN_MODE} \
--num-proc ${NUM_PROC} \
--cpus-per-proc ${CPUS_PER_PROC} \
--model-checkpoint-path ${CHECKPOINT_DIR} \
--warmup \
${WARMUP} \
--dataset-path ${VALIDATION_DATA_JSON} \
--batch-size ${BATCH_SIZE} \
--mlperf-conf ${CM_MLPERF_CONF} \
--user-conf ${CM_MLPERF_USER_CONF} \
--precision int8 \
--precision ${PRECISION} \
--pad-inputs \
--quantized-model ${INT8_MODEL_DIR}/best_model.pt \
--quantized-model ${QUANTIZED_MODEL} \
--workers-per-proc ${WORKERS_PER_PROC} \
--total-sample-count ${TOTAL_SAMPLE_COUNT} \
--output-dir ${OUTPUT_DIR} \
Expand Down

0 comments on commit 03e6260

Please sign in to comment.