forked from SymbioticLab/Fluid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtune_pbt_mnist.py
62 lines (47 loc) · 1.64 KB
/
tune_pbt_mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from pathlib import Path
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining
from ray.util.sgd.utils import BATCH_SIZE
import workloads.common as com
from fluid.trainer import TorchTrainer
from workloads.common import mnist as workload
DATA_PATH, RESULTS_PATH = com.detect_paths()
EXP_NAME = com.remove_prefix(Path(__file__).stem, "tune_")
def setup_tune_scheduler():
ss, custom_explore = workload.create_sample_space()
search_space = workload.create_search_space()
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
perturbation_interval=5,
hyperparam_mutations=ss,
custom_explore_fn=custom_explore,
**workload.exp_metric()
)
return dict(
scheduler=scheduler,
config=search_space,
# num_samples in PBT only sets population
num_samples=10,
resources_per_trial=com.detect_baseline_resource(),
)
def main():
_, sd = com.init_ray()
MyTrainable = TorchTrainer.as_trainable(
data_creator=workload.data_creator,
model_creator=workload.model_creator,
loss_creator=workload.loss_creator,
optimizer_creator=workload.optimizer_creator,
config={"seed": sd, BATCH_SIZE: 64, "extra_fluid_trial_resources": {}},
)
params = {
**com.run_options(__file__),
"stop": workload.create_stopper(),
**setup_tune_scheduler(),
}
analysis = tune.run(MyTrainable, **params)
dfs = analysis.trial_dataframes
for logdir, df in dfs.items():
ld = Path(logdir)
df.to_csv(ld / "trail_dataframe.csv")
if __name__ == "__main__":
main()