-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfigure5.sh
executable file
·146 lines (112 loc) · 6.95 KB
/
figure5.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/bin/bash
cd /workspace/throughput/translation
export PYTHONWARNINGS="ignore"
echo "Experiment starts..."
# naspipe
echo "NasPipe."
# c0
echo "c0."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo -b 3840 --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json \
--input_path config_4_4.json
# c1
echo "c1."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo -b 3840 --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json \
--input_path config_4_3.json
echo "c2."
# c2
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo -b 3840 --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json \
--input_path config_4_2.json
echo "c3."
# c3
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo -b 3840 --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json \
--input_path config_4.json
cd /workspace/baselines/translation
echo "VPipe."
echo "c0."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module vgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --rep 16 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path vgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 192 --input_path=config_4_3.json --sys=vpipe
echo "c1."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module vgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --rep 16 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path vgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 192 --input_path=config_4_3.json --sys=vpipe
echo "c2."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module vgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --rep 16 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path vgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 192 --input_path=config_4_2.json --sys=vpipe
echo "c3."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module vgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --rep 16 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path vgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 192 --input_path=config_4.json --sys=vpipe
echo "GPipe."
echo "c1."
python -m autolaunch --collectsteps 1200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 32 --input_path=config_4_3.json --sys=gpipe
echo "c2."
python -m autolaunch --collectsteps 600 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 64 --input_path=config_4_2.json --sys=gpipe
echo "c3."
python -m autolaunch --collectsteps 200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module gpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path gpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 128 --input_path=config_4.json --sys=gpipe
echo "Pipedream."
echo "c1."
python -m autolaunch --collectsteps 1200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module pgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path pgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 16 --input_path=config_4_3.json --sys=pipedream
echo "c2."
python -m autolaunch --collectsteps 1200 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module pgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path pgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 24 --input_path=config_4_2.json --sys=pipedream
echo "c3."
python -m autolaunch --collectsteps 600 --nnodes 1 --node_rank 0 --nproc_per_node 4 main_with_runtime.py \
--data_dir data/wmt14_en_de_joined_dict --master_addr localhost --module pgpus=4 \
--checkpoint_dir output --distributed_backend gloo --lr 0.000060 \
--lr_policy polynomial --weight-decay 0.000000 --epochs 10 --print-freq 10 \
--verbose 0 --num_ranks_in_server 4 --config_path pgpus=4/mp_conf.json --num_minibatches 5000 \
--batch_size 48 --input_path=config_4.json --sys=pipedream