Skip to content

Commit

Permalink
Add small demo
Browse files Browse the repository at this point in the history
  • Loading branch information
davidjurado committed Sep 22, 2023
1 parent 941d161 commit 57ab273
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 0 deletions.
49 changes: 49 additions & 0 deletions language_model/tensorflow/bert/cleanup_scripts/download_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

# pip install --user gdown

DATA_DIR="./wiki"

# Capture MLCube parameter
while [ "$1" != "" ]; do
case $1 in
--data_dir=*)
DATA_DIR="${1#*=}"
;;
esac
shift
done

mkdir -p $DATA_DIR

cd $DATA_DIR

# Downloading files from Google Drive location: https://drive.google.com/drive/folders/1oQF4diVHNPCclykwdvQJw8n_VIWwV0PT

# bert_config.json
gdown https://drive.google.com/uc?id=1fbGClQMi2CoMv7fwrwTC5YYPooQBdcFW

#License.txt
gdown https://drive.google.com/uc?id=1SYfj3zsFPvXwo4nUVkAS54JVczBFLCWI

# vocab.txt
gdown https://drive.google.com/uc?id=1USK108J6hMM_d27xCHi738qBL8_BT1u1

# Download TF-2 checkpoints
mkdir tf2_ckpt

cd tf2_ckpt

gdown https://drive.google.com/uc?id=1pJhVkACK3p_7Uc-1pAzRaOXodNeeHZ7F

gdown https://drive.google.com/uc?id=1oVBgtSxkXC9rH2SXJv85RXR9-WrMPy-Q

cd ..

# Download dummy data in TFRecord format
wget https://storage.googleapis.com/bert_tf_data/sample_data/tf_data.zip
unzip tf_data.zip
rm tf_data.zip
wget https://storage.googleapis.com/bert_tf_data/sample_data/tf_eval_data.zip
unzip tf_eval_data.zip
rm tf_eval_data.zip
22 changes: 22 additions & 0 deletions language_model/tensorflow/bert/mlcube/mlcube.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ tasks:
parameters:
outputs:
data_dir: data/
download_demo:
entrypoint: ./cleanup_scripts/download_demo.sh -a
parameters:
outputs:
data_dir: data/
process_data:
entrypoint: ./cleanup_scripts/create_pretraining_data.sh -a
parameters:
Expand All @@ -36,6 +41,23 @@ tasks:
outputs:
output_path: tf_data/
output_eval_path: tf_eval_data/
demo:
entrypoint: ./run_demo.sh -a
parameters:
inputs:
tfdata_path: data/tf_data/
init_checkpoint:
type: file
default: data/tf2_ckpt/model.ckpt-28252
eval_file:
type: file
default: data/tf_eval_data/eval_10k
config_path:
type: file
default: data/bert_config.json
outputs:
log_dir: logs/
output_dir: final_output/
train:
entrypoint: ./run_and_time.sh -a
parameters:
Expand Down
114 changes: 114 additions & 0 deletions language_model/tensorflow/bert/run_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash

set +x
set -e

# start timing
start=$(date +%s)
start_fmt=$(date +%Y-%m-%d\ %r)
echo "STARTING TIMING RUN AT $start_fmt"

# Set variables
: "${TFDATA_PATH:=./workspace/output_data}"
: "${INIT_CHECKPOINT:=./workspace/data/tf2_ckpt}"
: "${EVAL_FILE:=./workspace/tf_eval_data/eval_10k}"
: "${CONFIG_PATH:=./workspace/data/bert_config.json}"
: "${LOG_DIR:=./workspace/logs}"
: "${OUTPUT_DIR:=./workspace/final_output}"

# Handle MLCube parameters
while [ $# -gt 0 ]; do
case "$1" in
--tfdata_path=*)
TFDATA_PATH="${1#*=}"
;;
--config_path=*)
CONFIG_PATH="${1#*=}"
;;
--init_checkpoint=*)
INIT_CHECKPOINT="${1#*=}"
;;
--log_dir=*)
LOG_DIR="${1#*=}"
;;
--output_dir=*)
OUTPUT_DIR="${1#*=}"
;;
--eval_file=*)
EVAL_FILE="${1#*=}"
;;
*) ;;
esac
shift
done

# run benchmark
echo "running benchmark"

TF_XLA_FLAGS='--tf_xla_auto_jit=2' \
python run_pretraining.py \
--bert_config_file=$CONFIG_PATH \
--output_dir=$OUTPUT_DIR \
--input_file="$TFDATA_PATH/part*" \
--init_checkpoint=$INIT_CHECKPOINT \
--nodo_eval \
--do_train \
--eval_batch_size=1 \
--learning_rate=0.0001 \
--iterations_per_loop=1000 \
--max_predictions_per_seq=76 \
--max_seq_length=512 \
--num_train_steps=5 \
--num_warmup_steps=1 \
--optimizer=lamb \
--save_checkpoints_steps=1 \
--start_warmup_step=0 \
--num_gpus=1 \
--train_batch_size=1 |& tee "$LOG_DIR/train_console.log"

# Copy log file to MLCube log folder
if [ "$LOG_DIR" != "" ]; then
timestamp=$(date +%Y%m%d_%H%M%S)
cp bert.log "$LOG_DIR/bert_train_$timestamp.log"
fi

TF_XLA_FLAGS='--tf_xla_auto_jit=2' \
python3 run_pretraining.py \
--bert_config_file=$CONFIG_PATH \
--output_dir=$OUTPUT_DIR \
--input_file=$EVAL_FILE \
--do_eval \
--nodo_train \
--eval_batch_size=8 \
--init_checkpoint=$OUTPUT_DIR/model.ckpt-5 \
--iterations_per_loop=10 \
--learning_rate=0.0001 \
--max_eval_steps=10 \
--max_predictions_per_seq=76 \
--max_seq_length=512 \
--num_gpus=1 \
--num_train_steps=5 \
--num_warmup_steps=1 \
--optimizer=lamb \
--save_checkpoints_steps=1 \
--start_warmup_step=1 \
--train_batch_size=24 \
--nouse_tpu |& tee "$LOG_DIR/eval_console.log"; ret_code=$?

# Copy log file to MLCube log folder
if [ "$LOG_DIR" != "" ]; then
timestamp=$(date +%Y%m%d_%H%M%S)
cp bert.log "$LOG_DIR/bert_eval_$timestamp.log"
fi

set +x

sleep 3
if [[ $ret_code != 0 ]]; then exit $ret_code; fi

# end timing
end=$(date +%s)
end_fmt=$(date +%Y-%m-%d\ %r)
echo "ENDING TIMING RUN AT $end_fmt"
secs=$((end-start))
printf 'TOTAL EXECUTION TIME: %dh:%dm:%ds\n' $((secs/3600)) $((secs%3600/60)) $((secs%60))

0 comments on commit 57ab273

Please sign in to comment.