forked from vincentfung13/MINE
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsbatch_start_train_llff.sh
17 lines (14 loc) · 1004 Bytes
/
sbatch_start_train_llff.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!/bin/bash
#SBATCH --gres=gpu:v100l:4 # Request GPU "generic resources"
#SBATCH --cpus-per-task=8 # Cores proportional to GPUs: 6 on Cedar, 16 on Graham.
#SBATCH --mem=32G # Memory proportional to GPUs: 32000 Cedar, 64000 Graham.
#SBATCH --time=0-04:00
#SBATCH --output=/project/def-karray/yafathi/MINE/sbatch_output/LLFF_Single_View_Alpha_MINE_%N-%j.out
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export NCCL_BLOCKING_WAIT=0 #Set this environment variable if you wish to use the NCCL backend for inter-GPU communication.
export MASTER_ADDR=$(hostname) #Store the master node’s IP address in the MASTER_ADDR environment variable.
echo "r$SLURM_NODEID master: $MASTER_ADDR"
module load python/3.8
source /project/def-karray/yafathi/MINE_ENV/bin/activate
nvidia-smi
sh start_training.sh MASTER_ADDR=$(hostname) MASTER_PORT=1234 N_NODES=1 GPUS_PER_NODE=4 NODE_RANK=0 WORKSPACE=/project/def-karray/yafathi/MINE/ DATASET=llff VERSION=experiments EXTRA_CONFIG='{"training.gpus": "0,1,2,3"}'