-
Notifications
You must be signed in to change notification settings - Fork 14
/
example_params
339 lines (290 loc) · 11.2 KB
/
example_params
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# This is a parameters file for GADMA software.
# Lines that begin with # are ignored.
# Comments at the end of a line are also ignored.
# Each line contains: Parameter identifier : value.
#!!! Indicates parameters that require special attention.
#!!!
# Output directory for all GADMA outputs.
# This should be set to a missing or empty directory.
# If the process is resumed from another directory and the output
# directory is not specified, GADMA will append '_resumed' to the
# previous output directory.
Output directory: my_example_run
#!!!
# Input data can be in the form of an SFS file (should end with .fs),
# a SNP file in Dadi format (should end with .txt), or a
# VCF file along with a popmap file (sample population map).
Input data: tests/test_data/DATA/sfs/YRI_CEU.fs
# 'Population labels' consist of a sequence of population names
# (these must match the names in the input file).
# If the .fs file is in an old format, it will rename population
# labels that are absent.
# They must be ordered from the most ancient to the least ancient
# (if there are more than two populations).
# This is important because the last formed population will be
# involved in the next split.
# For example, if we have YRI (African population),
# CEU (European population), and CHB (Chinese population),
# we can write YRI, CEU, CHB or YRI, CHB, CEU
# (YRI must be in the first position).
# Default: from input file
Population labels: [YRI, CEU]
# SFS projections: number of chromosomes for each population.
# It is also possible to downproject the spectrum to a smaller size.
# For example, if we have 40 diploid samples in each of three
# populations, then the full projections will be 80x80x80. We can
# project it by setting the 'Projections' parameter to [20, 20, 20].
# Default: from input file
Projections: [20, 20]
# To indicate if outgroup information is included in the SFS data,
# set the Outgroup option.
# If outgroup is False, then the SFS will be folded.
# Default: from input file
Outgroup: True
# Effective length of the sequence used to build the SFS data.
# This should be used together with the Mutation rate and can be replaced
# by the Theta0 setting.
# Default: None
Sequence length: 4040000
#!!!
# Are the SNPs linked or unlinked?
# If they are linked, then the Composite Likelihood Akaike
# Information Criterion (CLAIC) will be used to compare models.
# If they are unlinked, then the standard Akaike Information Criterion
# (AIC) will be used.
# Default: True
Linked SNP's: True
#!!!
# If SNPs are linked, please set the directory containing
# the bootstrapped data for CLAIC calculation.
# Bootstrapping should be done over regions of the genome.
# Default: None
Directory with bootstrap: Null
#!!!
# Now for the main parameters:
#
# Engine for demographic inference.
# Default: moments
Engine: moments
# If you choose to use Dadi, please set the 'pts' parameter - the number
# of points in the grid. Otherwise, you can still specify it: it will be
# used in Dadi's code.
# Default: Let n = max number of individuals in one population,
# then pts = n, n+10, n+20
Pts: [20, 30, 40]
#!!!
# Print parameters of the model in units of N_ref = N_A.
# N_A will be placed in brackets at the end of the string.
# Default: False
Relative parameters: False
# Total mutation flux - theta.
# It is equal to:
# theta = 4 * μ * L
# where μ is the mutation rate per site per generation and
# L is the effective sequenced length, which accounts for losses
# in alignment and missed calls.
# Note: μ should be estimated based on generation time.
# Default: 1.0
Theta0: Null
# Instead of Theta0, the mutation rate can be set independently.
# This should be used together with the Sequence length option.
# Default: None
Mutation rate: 2.35e-08
# Recombination rate. Required for momentsLD engine.
# Default: None
Recombination rate: Null
# Time (in years) for one generation. Can be a float.
# This is important for drawing models. If you do not wish to draw,
# you may skip this parameter.
# Default: 1.0
Time for generation: Null
#!!!
# You must choose the demographic history to infer.
# It can be custom or set up with structure.
# 1. Using a custom demographic model.
# Please specify a file containing a function named 'model_func'.
# The file should contain:
# def model_func(params, ns, pts) for Dadi
# or
# def model_func(params, ns) for moments
# Default: None
Custom filename: Null
# You must now specify either bounds or identifiers for the
# parameters of the custom model. All values are in genetic units.
# Lower and upper bounds are lists of numbers.
# Common bounds:
# N: 1e-2 - 100
# T: 0 - 5
# m: 0 - 10
# s: 0 - 1
# These bounds will be applied automatically if identifiers are set.
# Default: None
Lower bound: Null
Upper bound: Null
# An identifier list:
# T - time
# N - size of population
# m - migration
# s - split event, representing the proportion of population size
# divided to form two new populations.
# Default: None
Parameter identifiers: Null
# 2. Structure is for non-custom models!
# Structure of the model for one population - number of time periods
# (e.g., 5).
# Structure of the model for two populations - number of time periods
# before the split of the ancestral population and after it (e.g., 2,2).
# Structure of the model for three populations - number of time periods
# before the first split, between the first and second splits, and after
# the second split (e.g., 2,1,2).
#
# Initial model structure:
# Default: all ones - 1 or 1,1 or 1,1,1
Initial structure: [1, 1]
# Final model structure:
# Default: equals the initial structure
Final structure: [2, 1]
#!!!
# Additional settings for demographic models with structure.
#
# Use sudden changes in population sizes only. This reduces
# the number of parameters.
# Default: False
Only sudden: False
# The set of available size dynamics can be any subset.
# 'Sud' stands for sudden size change (constant during the next time epoch),
# 'Lin' for linear size change,
# 'Exp' for exponential size change.
# If 'Only sudden' is True, then this setting will be [Sud].
# Default: [Sud, Lin, Exp]
Dynamics: [Sud, Lin, Exp]
# Disable migrations in demographic models.
# Default: False
No migrations: False
# Make all migrations symmetrical.
# Default: False
Symmetric migrations: False
# Enable or disable migrations selectively.
# Default: None
Migration masks: Null
# Enable or disable inference of selection coefficients.
# Supported only by moments and Dadi engines.
# Default: False
Selection: False
# Enable or disable inference of the dominance coefficient.
# If True, then the 'Selection' setting must also be True.
# Supported only by moments and Dadi engines.
# Default: False
Dominance: False
# Estimate the fraction of the ancestral population as a parameter of the
# split. If False, then the population splits and each of the new
# populations has its own size as a parameter of the model.
# Default: False
Split fractions: True
# Estimate inbreeding coefficients as model parameters.
# Can be used only with the Dadi engine.
# Default: False
Inbreeding: False
# If False, then a multinomial approach is used in Dadi and moments.
# In the multinomial approach, the ancestral size is inferred implicitly.
# Default: False
Ancestral size as parameter: False
# It is possible to limit the time of splits by specifying bounds.
# Split 1 is the most ancient split.
# !Note that time is measured in generations:
# e.g., to limit by 150 kya, if time for one generation is
# 25 years, then the bound will be 150000 / 25 = 6000.
#
# Lower bound for split 1 (for 2 or 3 populations).
# Default: None
Lower bound of first split: Null
# Upper bound for split 1 (in case of 2 or 3 populations).
# Default: None
Upper bound of first split: Null
# Lower bound for split 2 (in case of 3 populations).
# Default: None
Lower bound of second split: Null
# Upper bound for split 2 (in case of 3 populations).
# Default: None
Upper bound of second split: Null
#!!!
# Local optimization.
#
# Choice of local optimization that is launched after
# each genetic algorithm.
# Choices:
#
# * optimize (BFGS method)
#
# * optimize_log (BFGS method)
#
# * optimize_powell (Powell’s conjugate direction method)
# (Note: implemented in moments; one needs to have moments
# installed.)
#
# (If optimizations often hit the parameter bounds,
# try using these methods:)
# * optimize_lbfgsb
# * optimize_log_lbfgsb
# (Note that it is generally best to start with the vanilla BFGS
# methods, as the L-BFGS-B methods will always test parameter
# values at the bounds during the search. This can dramatically
# slow down model fitting.)
#
# * optimize_log_fmin (simplex (a.k.a. amoeba) method)
#
# * hill_climbing
#
# Default: optimize_powell
Local optimizer: BFGS_log
# Parameters of the pipeline.
#
# One can automatically generate dadi and moments code for models.
# If set to 0, only the current best model will be printed in the GA's
# working directory. The resulting model will also be saved there.
# If specified (not 0), then every N iterations, the model will be saved
# in the Python code folder.
# Default: 0
Print models' code every N iteration: 100
# Engine that will draw demographic model plots.
# Can be moments or demes.
# Default: moments
Model plot engine: moments
# One can automatically draw models every N iterations.
# If set to 0, models will never be drawn.
# Pictures are saved in the GA's directory in the picture folder.
# Default: 0
Draw models every N iteration: 100
# One can choose time units in model plots: years or thousands
# of years (kya, KYA). If time for one generation is not specified,
# time will be represented in genetic units.
# Default: years
Units of time in drawing: generations
# Minimum value to be drawn in SFS plots.
# Default: 1
Vmin: 1
# Suppresses standard output.
# Default: False
Silence: False
# Verbosity level for optimization output.
# Default: 1
Verbose: 1
# How many times to launch GADMA with these parameters.
# Default: 1
Number of repeats: 3
# How many processes to use for these repeats.
# Note that one repeat is not parallelized, so increasing the number
# of processes will not affect the time of one repeat.
# It is advisable that the number of repeats is a multiple of
# the number of processes.
# Default: 1
Number of processes: 3
# One can resume from a previous GADMA run by setting
# the output directory of that run in the 'Resume from' parameter.
# New parameters for the resumed run can be set again.
Resume from: Null
# If you want to only take models from the previous run, set this
# flag. Iterations of the GA will then start from 0, and values for
# the mutation rate and strength will be initial.
# Default: None
Only models: False