Skip to content

Commit

Permalink
update metadata and config for denmark
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Mar 4, 2021
1 parent ddccc2f commit 9512900
Show file tree
Hide file tree
Showing 3 changed files with 764 additions and 0 deletions.
103 changes: 103 additions & 0 deletions project/denmark/config/snakemake.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Snakemake Configuration File

# SQLITE Parameters
sqlite_db : "yersinia_pestis_db.sqlite"
sqlite_select_command_asm : SELECT
AssemblyFTPGenbank
FROM
BioSample
LEFT JOIN Assembly
ON AssemblyBioSampleAccession = BioSampleAccession
WHERE
(BioSampleComment LIKE '%KEEP%Assembly%Modern%' AND
length(AssemblyFTPGenbank) > 0)
sqlite_select_command_sra : SELECT
BioSampleAccession,
SRARunAccession
FROM
BioSample
LEFT JOIN SRA
ON SRABioSampleAccession = BioSampleAccession
WHERE
(BioSampleComment LIKE '%KEEP%SRA%Ancient%' AND
SRAComment NOT LIKE "%REMOVE%")
sqlite_select_command_local : SELECT
BioSampleAccession
FROM
BioSample
WHERE
(BioSampleComment LIKE '%Local%Denmark%')
sqlite_select_command_ref : SELECT
AssemblyFTPGenbank
FROM
BioSample
LEFT JOIN Assembly
ON AssemblyBioSampleAccession = BioSampleAccession
WHERE
(BioSampleComment LIKE '%Assembly%Modern%Reference%')

# Dataset size
max_datasets_assembly : 1000
max_datasets_sra : 1000
reads_origin :
- "assembly"
- "sra"
- "local"

# misc filtering
detect_repeats_threshold : 90
detect_repeats_length : 50

reference_locus : "AL590842"
reference_locus_name : "chromosome"
reference_locus_start : "0"
reference_locus_end : "4653728"

# Eager param
eager_rev: "2.2.1"
eager_clip_readlength : 35
eager_bwaalnn : 0.01
eager_bwaalnl : 16
eager_other : '--mergedonly'
organism : "Yersinia pestis"
# Adapter Removal Defaults
eager_forward_adapter : 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG'
eager_reverse_adapter : 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT'

# Snippy Parameters
snippy_ctg_depth : 10
snippy_bam_depth : 3
snippy_base_qual : 20
snippy_map_qual : 30
snippy_min_frac : 0.9
snippy_mask_char : "X"
snippy_missing_data : 5
snippy_snp_density : 10
# Make this an empty string if removing singletons
snippy_keep_singleton: ""
#snippy_keep_singleton : "--keep-singleton"
snippy_multi_plot_missing_data:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10

# IQTREE
#iqtree_model: "-m MFP"
iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
# Outgroup Option #1: Reference
#iqtree_outgroup : "Reference"
# Outgroup Option #2: Basal modern clade
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"
# Outgroup Option #3: Basal ancient clade
iqtree_outgroup : "SAMEA3541826,SAMEA3541827"
iqtree_other : "--ufboot 1000"
iqtree_runs : 10
Loading

0 comments on commit 9512900

Please sign in to comment.