example_params

#    This is a parameters file for GADMA software.

#    Lines that begin with # are ignored.
#    Comments at the end of a line are also ignored.
#    Each line contains: Parameter identifier : value.

#!!!     Indicates parameters that require special attention.

#!!!
#    Output directory for all GADMA outputs.
#    This should be set to a missing or empty directory.
#    If the process is resumed from another directory and the output 
#    directory is not specified, GADMA will append '_resumed' to the 
#    previous output directory.
Output directory: my_example_run

#!!!
#    Input data can be in the form of an SFS file (should end with .fs), 
#    a SNP file in Dadi format (should end with .txt), or a 
#    VCF file along with a popmap file (sample population map).
Input data: tests/test_data/DATA/sfs/YRI_CEU.fs

#    'Population labels' consist of a sequence of population names 
#    (these must match the names in the input file).
#    If the .fs file is in an old format, it will rename population 
#    labels that are absent.
#    They must be ordered from the most ancient to the least ancient 
#    (if there are more than two populations).
#    This is important because the last formed population will be 
#    involved in the next split.
#    For example, if we have YRI (African population), 
#    CEU (European population), and CHB (Chinese population),
#    we can write YRI, CEU, CHB or YRI, CHB, CEU 
#    (YRI must be in the first position).
#    Default: from input file
Population labels: [YRI, CEU]

#    SFS projections: number of chromosomes for each population.
#    It is also possible to downproject the spectrum to a smaller size.
#    For example, if we have 40 diploid samples in each of three 
#    populations, then the full projections will be 80x80x80. We can 
#    project it by setting the 'Projections' parameter to [20, 20, 20].
#    Default: from input file
Projections: [20, 20]

#   To indicate if outgroup information is included in the SFS data,
#   set the Outgroup option.
#   If outgroup is False, then the SFS will be folded.
#   Default: from input file
Outgroup: True

#   Effective length of the sequence used to build the SFS data.
#   This should be used together with the Mutation rate and can be replaced
#   by the Theta0 setting.
#   Default: None
Sequence length: 4040000

#!!!
#    Are the SNPs linked or unlinked?
#    If they are linked, then the Composite Likelihood Akaike
#    Information Criterion (CLAIC) will be used to compare models.
#    If they are unlinked, then the standard Akaike Information Criterion 
#    (AIC) will be used.
#    Default: True
Linked SNP's: True

#!!!
#    If SNPs are linked, please set the directory containing 
#    the  bootstrapped data for CLAIC calculation. 
#    Bootstrapping should be done over regions of the genome.
#    Default: None
Directory with bootstrap: Null

#!!!
#    Now for the main parameters:
#
#    Engine for demographic inference.
#    Default: moments
Engine: moments

#    If you choose to use Dadi, please set the 'pts' parameter - the number
#    of points in the grid. Otherwise, you can still specify it: it will be
#    used in Dadi's code.
#    Default: Let n = max number of individuals in one population, 
#    then pts = n, n+10, n+20
Pts: [20, 30, 40]

#!!!
#    Print parameters of the model in units of N_ref = N_A.
#    N_A will be placed in brackets at the end of the string.
#    Default: False
Relative parameters: False

#    Total mutation flux - theta.
#    It is equal to:
#    theta = 4 * μ * L
#    where μ is the mutation rate per site per generation and 
#    L is the effective sequenced length, which accounts for losses 
#    in alignment and missed calls.
#    Note: μ should be estimated based on generation time.
#    Default: 1.0
Theta0: Null

#   Instead of Theta0, the mutation rate can be set independently.
#   This should be used together with the Sequence length option.
#   Default: None
Mutation rate: 2.35e-08

#    Recombination rate. Required for momentsLD engine.
#    Default: None
Recombination rate: Null

#    Time (in years) for one generation. Can be a float. 
#    This is important for drawing models. If you do not wish to draw, 
#    you may skip this parameter.
#    Default: 1.0
Time for generation: Null

#!!!
#    You must choose the demographic history to infer.
#    It can be custom or set up with structure.

# 1. Using a custom demographic model.
#    Please specify a file containing a function named 'model_func'. 
#    The file should contain:
#    def model_func(params, ns, pts) for Dadi
#    or
#    def model_func(params, ns) for moments
#    Default: None
Custom filename: Null

#    You must now specify either bounds or identifiers for the
#    parameters  of the custom model. All values are in genetic units.
#    Lower and upper bounds are lists of numbers.
#    Common bounds:
#    N: 1e-2 - 100
#    T: 0 - 5
#    m: 0 - 10
#    s: 0 - 1
#    These bounds will be applied automatically if identifiers are set.
#    Default: None
Lower bound: Null
Upper bound: Null

#    An identifier list:
#    T - time
#    N - size of population
#    m - migration
#    s - split event, representing the proportion of population size
#    divided to form two new populations.
#    Default: None
Parameter identifiers: Null

# 2. Structure is for non-custom models!
#    Structure of the model for one population - number of time periods 
#    (e.g., 5).
#    Structure of the model for two populations - number of time periods
#    before the split of the ancestral population and after it (e.g., 2,2).
#    Structure of the model for three populations - number of time periods
#    before the first split, between the first and second splits, and after 
#    the second split (e.g., 2,1,2).
#
#    Initial model structure:
#    Default: all ones - 1 or 1,1 or 1,1,1
Initial structure: [1, 1]

#    Final model structure:
#    Default: equals the initial structure
Final structure: [2, 1]

#!!!
#    Additional settings for demographic models with structure.
#
#    Use sudden changes in population sizes only. This reduces 
#    the number of parameters.
#    Default: False
Only sudden: False

#    The set of available size dynamics can be any subset.
#    'Sud' stands for sudden size change (constant during the next time epoch),
#    'Lin' for linear size change,
#    'Exp' for exponential size change.
#    If 'Only sudden' is True, then this setting will be [Sud].
#    Default: [Sud, Lin, Exp]
Dynamics: [Sud, Lin, Exp]

#    Disable migrations in demographic models.
#    Default: False
No migrations: False

#   Make all migrations symmetrical.
#   Default: False
Symmetric migrations: False

#   Enable or disable migrations selectively.
#   Default: None
Migration masks: Null

#    Enable or disable inference of selection coefficients.
#    Supported only by moments and Dadi engines.
#    Default: False
Selection: False

#    Enable or disable inference of the dominance coefficient.
#    If True, then the 'Selection' setting must also be True.
#    Supported only by moments and Dadi engines.
#    Default: False
Dominance: False

#   Estimate the fraction of the ancestral population as a parameter of the
#   split. If False, then the population splits and each of the new
#   populations has its own size as a parameter of the model.
#   Default: False
Split fractions: True

#   Estimate inbreeding coefficients as model parameters.
#   Can be used only with the Dadi engine.
#   Default: False
Inbreeding: False

#   If False, then a multinomial approach is used in Dadi and moments.
#   In the multinomial approach, the ancestral size is inferred implicitly.
#   Default: False
Ancestral size as parameter: False

#    It is possible to limit the time of splits by specifying bounds.
#    Split 1 is the most ancient split.
#    !Note that time is measured in generations:
#    e.g., to limit by 150 kya, if time for one generation is 
#    25 years, then the bound will be 150000 / 25 = 6000.
#
#    Lower bound for split 1 (for 2 or 3 populations).
#    Default: None
Lower bound of first split: Null

#    Upper bound for split 1 (in case of 2 or 3 populations).
#    Default: None
Upper bound of first split: Null

#    Lower bound for split 2 (in case of 3 populations).
#    Default: None
Lower bound of second split: Null

#    Upper bound for split 2 (in case of 3 populations).
#    Default: None
Upper bound of second split: Null

#!!!
#    Local optimization.
#
#    Choice of local optimization that is launched after 
#    each genetic algorithm.
#    Choices:
#
#    *    optimize (BFGS method)
#    
#    *    optimize_log (BFGS method)
#    
#    *    optimize_powell (Powell’s conjugate direction method)
#    (Note: implemented in moments; one needs to have moments 
#    installed.)
#
#    (If optimizations often hit the parameter bounds, 
#    try using these methods:)
#    *    optimize_lbfgsb
#    *    optimize_log_lbfgsb 
#    (Note that it is generally best to start with the vanilla BFGS 
#    methods, as the L-BFGS-B methods will always test parameter
#    values at the bounds during the search. This can dramatically 
#    slow down model fitting.)
#
#    *    optimize_log_fmin (simplex (a.k.a. amoeba) method)
#    
#    *    hill_climbing
#    
#    Default: optimize_powell
Local optimizer: BFGS_log

#    Parameters of the pipeline.
#
#    One can automatically generate dadi and moments code for models.
#    If set to 0, only the current best model will be printed in the GA's 
#    working directory. The resulting model will also be saved there. 
#    If specified (not 0), then every N iterations, the model will be saved
#    in the Python code folder.
#    Default: 0
Print models' code every N iteration: 100

#    Engine that will draw demographic model plots.
#    Can be moments or demes.
#    Default: moments
Model plot engine: moments

#    One can automatically draw models every N iterations. 
#    If set to 0, models will never be drawn.
#    Pictures are saved in the GA's directory in the picture folder.
#    Default: 0
Draw models every N iteration: 100

#    One can choose time units in model plots: years or thousands 
#    of years (kya, KYA). If time for one generation is not specified, 
#    time will be represented in genetic units.
#    Default: years
Units of time in drawing: generations

#    Minimum value to be drawn in SFS plots.
#    Default: 1
Vmin: 1

#    Suppresses standard output.
#    Default: False
Silence: False

#    Verbosity level for optimization output.
#    Default: 1
Verbose: 1

#    How many times to launch GADMA with these parameters.
#    Default: 1
Number of repeats: 3

#    How many processes to use for these repeats.
#    Note that one repeat is not parallelized, so increasing the number
#    of processes will not affect the time of one repeat.
#    It is advisable that the number of repeats is a multiple of 
#    the number of processes.
#    Default: 1
Number of processes: 3

#    One can resume from a previous GADMA run by setting
#    the output directory of that run in the 'Resume from' parameter.
#    New parameters for the resumed run can be set again.
Resume from: Null

#    If you want to only take models from the previous run, set this 
#    flag. Iterations of the GA will then start from 0, and values for
#    the mutation rate and strength will be initial.
#    Default: None
Only models: False