- Make work directory (If you have never done)
WORK="/data/hp190122/users/$(id -u -n)/work" mkdir -p $WORK ln -s $WORK $HOME/work
- Move to work directory
cd $HOME/work
- Change user group
newgrp hp190122
- Launch interactive job
pjsub --interact -L "node=1" -L "rscunit=rscunit_ft01" -L "rscgrp=int" -L "elapse=6:00:00" --sparam "wait-time=600" --mpi "proc=48" -x PJM_LLIO_GFSCACHE=/vol0003:/vol0004
- Enable pre-built PyTorch v1.10.1
source /data/hp190122/share/PyTorch-1.10.1/env.src
- Install required Python modules
export PYTHONUSERBASE=$HOME/work/.local export PATH=$PATH:$PYTHONUSERBASE/bin pip3 install --user deepspeed pip3 install --user datasets pip3 install --user nltk
- Build DeepSpeedFugaku
git clone git@github.com:rioyokotalab/DeepSpeedFugaku.git cd DeepSpeedFugaku/ git switch training/feature/v2-tokenizer git branch <user-sub-team-name>/feature/<branch-name> git switch <user-sub-team-name>/feature/<branch-name> python3 setup.py install --user
- Move to DeepSpeedFugaku directory
cd $HOME/work/DeepSpeedFugaku
- Change user group
newgrp hp190122
- Launch interactive job
pjsub --interact -L "node=1" -L "rscunit=rscunit_ft01" -L "rscgrp=int" -L "elapse=6:00:00" --sparam "wait-time=600" --mpi "proc=48" -x PJM_LLIO_GFSCACHE=/vol0003:/vol0004
- Enable pre-built PyTorch v1.10.1
source /data/hp190122/share/PyTorch-1.10.1/env.src
- Export environment variables
export PYTHONUSERBASE=$HOME/work/.local export PATH=$PATH:$PYTHONUSERBASE/bin
- download vocab file and merge file
cd dataset bash download_vocab.sh cd ..
- make megatron/data/helpers.cpp
cd megatron/data make cd ../..
- Execute job
sh run_pretrain_gpt_fugaku.sh
- Move to DeepSpeedFugaku directory
cd $HOME/work/DeepSpeedFugaku
- Change user group
newgrp hp190122
- Submit job
pjsub run_pretrain_gpt_fugaku.sh