This repository has been archived by the owner on Jun 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
basic_config.ini
108 lines (94 loc) · 4.31 KB
/
basic_config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Apache 2.0 License
# Copyright (c) 2022, Fraunhofer e.V.
# All rights reserved.
[DEFAULT]
batch_size = 50
epochs = 100
output_dir = checkpoints # path where to save
checkpoint_frequency = 25 # number of epochs until new checkpoint
pretrained = True # use pretrained model
augmentation = True # use augmentation
resume = '' # resume from checkpoint (if empty, don't resume)
weights_path = '' # path where weights are saved (if empty, don't use weighted sampling)
num_samples = None # number of samples (only for weighted sampling; if None, use whole dataset)
num_classes = None # number of classes (if None, infer from dataset or target model)
with_eval = False # evaluate during training
log_target_predictions = False # log target predictions
softmax_target = False # apply, if target has softmax output. For defenses this will be set automatically.
sample_seed = 1 # sample seed. Only used for weighted sampling.
# Dataset parameters
data_path = None # dataset path
dataset = cifar10 # dataset name
input_size = 224 # image size
# Distillation parameters
target_model = resnet34 # name of target model to train. Either timm model or "full".
target_path = '' # path of the target model checkpoint.
distillation_type = none # possible values: ['none', 'soft', 'hard']. If none, train a target model.
distillation_alpha = 1 # must be 1 for only distillation loss
distillation_tau = 3.0 # temperature
# Model parameters
model = deit_base_patch16_224 # name of model to train, must be one of the timm models
drop = 0.0 # dropout rate
drop_path = 0.1 # drop path rate
model_ema = True
model_ema_decay = 0.99996
model_ema_force_cpu = False
# Defense parameters
defense = none # possible values: ['none', 'random_noise', 'reverse_sigmoid', 'mad']
defense_log_path = '' # path where to save defense logs
mad_epsilon = 0.5 # epsilon value of mad defense
mad_oracle = extreme # oracle used by mad defense
rs_beta = 0.7 # beta value of reverse sigmoid defense
rs_gamma = 0.4 # gamma value of reverse sigmoid defense
rn_dist_z = l1 # norm used by random noise defense
rn_epsilonz = 0.5 # epsilon_z value of random noise defense
# Optimizer parameters
opt = adamw
opt_eps = 1e-08
opt_betas = None
clip_grad = None
momentum = 0.9
weight_decay = 0.05
# Learning rate schedule parameters
sched = cosine # LR scheduler
lr = 0.0005 # learning rate
lr_noise = None # learning rate noise on/off epoch percentages
lr_noise_pct = 0.67 # learning rate noise limit percent
lr_noise_std = 1.0 # learning rate noise std-dev
warmup_lr = 1e-06 # warmup learning rate
min_lr = 1e-05 # lower lr bound for cyclic schedulers that hit 0
decay_epochs = 30 # epoch interval to decay LR
warmup_epochs = 5 # epochs to warmup LR, if scheduler supports
cooldown_epochs = 10 # epochs to cooldown LR at min_lr, after cyclic schedule ends
patience_epochs = 10 # patience epochs for Plateau LR scheduler
decay_rate = 0.1 # LR decay rate
# Augmentation parameters
color_jitter = 0.4 # color jitter factor
aa = rand-m9-mstd0.5-inc1 # use AutoAugment policy. "v0" or "original"
smoothing = 0.1 # label smoothing
train_interpolation = bicubic # possible values: ['random', 'bilinear', 'bicubic']
repeated_aug = True # do not random erase first (clean) augmentation split
# Random Erase params
reprob = 0.25 # random erase prob
remode = pixel # random erase mode
recount = 1 # random erase count
resplit = False
# Mixup params
mixup = 0.0 # mixup alpha, mixup enabled if > 0.
cutmix = 0.0 # cutmix alpha, cutmix enabled if > 0
cutmix_minmax = None # cutmix min/max ratio, overrides alpha and enables cutmix if set
mixup_prob = 1.0 # probability of performing mixup or cutmix when either/both is enabled
mixup_switch_prob = 0.5 # probability of switching to cutmix when both mixup and cutmix enabled
mixup_mode = batch # how to apply mixup/cutmix params, possible values: ['batch', 'pair', 'elem']
# Dataloader parameters
device = cuda # device to use for training / testing
num_workers = 4
pin_mem = True # Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.
# other parameters
finetune = '' # finetune from checkpoint (if empty, don't finetune)
seed = 0
start_epoch = 0
eval = False
dist_eval = False
world_size = 1
dist_url = env://