-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Matrix changes and GATK Indel integration
- Loading branch information
1 parent
e9bb7d2
commit 053aea1
Showing
65 changed files
with
5,021 additions
and
3,192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_gubbins_masked.fa | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_gubbins_masked_invar_site_counts.txt | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_gubbins_masked_snp-sites.vcf | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_gubbins_masked_var_sites.fa | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_invar_site_counts.txt | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_masked_recomb_positions.txt | ||
modules/beast/test/2018_08_15_10_45_01_KPNIH1_ref_allele_unmapped_consensus_snp-sites.vcf | ||
modules/beast/test/input_beast.txt | ||
modules/beast/test/invar_base_counts.txt | ||
modules/beast/test/model_finder_IQTREE_noDec_wDates_LA.tree | ||
modules/beast/test/penn-st258_LA_bmt_ucln_bs_dta2_renamed.xml | ||
modules/beast/test/penn-st258_LA_bmt_ucln_bs_dta2.xml | ||
modules/beast/test/penn-st258_LA_bmt_ucln_bs_dta_renamed_st_invSites.xml | ||
modules/beast/test/penn-st258_LA_bmt_ucln_bs_dta_renamed.xml | ||
modules/beast/test/penn-st258_LA_bmt_ucln_bs_dta.xml | ||
modules/beast/test/test_commands.sh | ||
modules/beast/test/test_fasta_path.txt | ||
modules/beast/test/test_gff_path.txt |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
## Set which tools to use in pipeline: | ||
[pipeline] | ||
# Options for Aligner:bwa / smalt / bowtie | ||
aligner: bwa | ||
# Options for variant_caller: samtoolswithpostalignbam / gatkhaplotypecaller /samtools | ||
variant_caller: samtools | ||
|
||
## Set bin folder path. Please make sure all the executables are placed in bin folder. Also make sure the path for individual tools are correct. | ||
[bin_path] | ||
binbase: /nfs/esnitkin/bin_group/variant_calling_bin/ | ||
|
||
## Set PBS scheduler fields. | ||
[scheduler] | ||
# This small resource will be used for smaller jobs | ||
resources: nodes=1:ppn=4,pmem=4000mb,walltime=250:00:00 | ||
# Large cluster resources in case of large sample size/variant call sets | ||
large_resources: nodes=1:ppn=12,mem=47gb,walltime=250:00:00 | ||
email: apirani@med.umich.edu | ||
queue: flux | ||
flux_account: esnitkin_flux | ||
notification: ae | ||
|
||
## Tools/Module Parameters | ||
# Set Parameters for individual tools. Set the binbase of each tool: This should be the folder name of respective tools where the executables for each resp. tool resides. | ||
# Set parameter for Trimmomatic | ||
[Trimmomatic] | ||
trimmomatic_bin: /Trimmomatic/ | ||
adaptor_filepath: adapters/TruSeq3-Nextera_PE_combined.fa | ||
seed_mismatches: 2 | ||
palindrome_clipthreshold: 30 | ||
simple_clipthreshold: 10 | ||
minadapterlength: 8 | ||
#change this to true and see the effect on alignment | ||
keep_both_reads: true | ||
window_size: 4 | ||
window_size_quality: 20 | ||
minlength: 40 | ||
headcrop_length: 0 | ||
colon: : | ||
targetlength: 125 | ||
crop_length: 40 | ||
f_p: forward_paired.fq.gz | ||
f_up: forward_unpaired.fq.gz | ||
r_p: reverse_paired.fq.gz | ||
r_up: reverse_unpaired.fq.gz | ||
|
||
[bwa] | ||
bwa_bin: /bwa-0.7.12/ | ||
cores: 8 | ||
base_cmd: bwa | ||
algorithm: mem | ||
index: index | ||
RG_header: -R | ||
Mark_splithits: -M | ||
|
||
[bowtie] | ||
bowtie_bin: /bowtie2-2.2.6/ | ||
cores: 8 | ||
build_cmd: bowtie2-build | ||
align_cmd: bowtie2 | ||
parameters: -k 1 --non-deterministic --end-to-end | ||
|
||
[samtools] | ||
samtools_bin: /samtools-1.2/ | ||
base_cmd: samtools | ||
#minimum mapping quality | ||
#change parameter S to -t SP and D to -t DP | ||
mpileup_parameters: -ug -f | ||
faiindex: faidx | ||
#-q30 -B -E -C50 | ||
|
||
[bcftools] | ||
bcftools_bin: /bcftools-1.2/ | ||
base_cmd: bcftools | ||
call_parameters: -vg | ||
|
||
[picard] | ||
picard_bin: /picard-tools-2.5.0/ | ||
base_cmd: picard.jar | ||
|
||
[gatk] | ||
gatk_bin: /GenomeAnalysisTK-3.3-0/ | ||
base_cmd: GenomeAnalysisTK.jar | ||
haplotype_parameters: -T HaplotypeCaller | ||
#gatk_filter1_parameter_expression: FQ < 40.00 && MQ > 20 && QUAL > 50 && DP > 15 | ||
#gatk_filter2_parameter_expression: FQ < 0.025 && MQ > 50 && QUAL > 100 && DP > 15 | ||
#Deprecated. gatk_haplotypecaller Integration Pending for SNP calling. | ||
#gatk_haplotypecaller_specific_filter2_parameter_expression: FQ < 0.025 && MQ > 50 && QUAL > 100 && DP > 15 | ||
|
||
[vcftools] | ||
#vcftools_perl_bin: /vcftools_0.1.12b/perl/ | ||
vcftools_bin: /vcftools_0.1.12b/bin/ | ||
tabix_bin: /tabix-0.2.6/ | ||
#vcftools_bin: /vcftools_0.1.12b/bin/ | ||
vcftools_perl_bin: /vcftools_0.1.12b/perl/ | ||
|
||
[qualimap] | ||
qualimap_bin: /qualimap_v2.1/ | ||
base_cmd: qualimap | ||
|
||
[bedtools] | ||
bedtools_bin: /bedtools2-master/bin/ | ||
base_cmd: bedtools | ||
version_for_coverage: /version_for_coverage/ | ||
|
||
[bioawk] | ||
bioawk_bin: /bioawk-master/ | ||
base_cmd: bioawk | ||
|
||
[fasttree] | ||
fasttree_bin: /Fasttree_2.1.10/ | ||
#For Multithread fasttree; use FastTreeMP executable file | ||
base_cmd: FastTree | ||
|
||
[raxml] | ||
raxml_bin: /raxml/ | ||
openmpi_bin: /openmpi/bin/ | ||
# Other raxml executable available to use: raxmlHPC-PTHREADS,raxmlHPC-PTHREADS-SSE3,raxmlHPC-SSE3 | ||
base_cmd: raxmlHPC-HYBRID-SSE3 | ||
parameters: -f a -x 12345 -p 12345 -N autoMRE -m GTRCAT -T 20 | ||
|
||
[iqtree] | ||
iqtree_bin: /nfs/esnitkin/bin_group/anaconda3/bin/ | ||
base_cmd: iqtree | ||
parameters: -nt AUTO -bb 1000 -m GTR+G+ASC | ||
|
||
[gubbins] | ||
# Change this path to wherever gubbins is located/installed. Right now, installed using conda from anaconda3 package installed in bin_group. | ||
gubbins_bin: /nfs/esnitkin/bin_group/anaconda3/bin/ | ||
base_cmd: run_gubbins.py | ||
|
||
[mummer] | ||
mummer_bin: /MUMmer3.23/ | ||
nucmer_base_cmd: nucmer | ||
min_tandem_repeat_length: 20 | ||
percent_id: 95 | ||
|
||
## Variant Filters | ||
# Select which type of filters to use. Default is snitkin_filters. See Below snitkin_filters for more information about each filter criterias. | ||
[SNP_filters] | ||
filter_criteria: snitkin_filters | ||
# Other types of filters: SPANDx_filters, loose_filters, snitkin_filters, contamination_filters | ||
|
||
## Filters used for most of the Microbial Variant calling Projects. | ||
[snitkin_filters] | ||
avg_depth: no | ||
# If AVG_DEPTH is yes, the below DP threshold will be ignored. Instead, LOW_DEPTH and HIGH_DEPTH filter parameter will be used. | ||
# Filter variants with Depth less than the below threshold | ||
dp: 9 | ||
# A value of 2 means that regions with less than half of the average coverage of the entire genome will fail | ||
low_depth: 2 | ||
# A value of 5 means that regions with 5x depth greater than the average coverage will fail | ||
high_depth: 5 | ||
# Filter variants with FQ(Consensus Quality) greater than the below threshold | ||
fq: 0.025 | ||
fq2: 0.025 | ||
# Filter variants with MQ(Root Mean Square Quality) less than the below threshold | ||
mq: 50 | ||
# Filter variants with Variant QUAL less than the below threshold | ||
qual: 100 | ||
# Filter variants with GATK QualbyDepth QD parameter; filter less than the below threshold. Currently, being used for Indel SNPS only. | ||
qd: 2.00 | ||
# Filter variants with AF1 less than the below threshold | ||
af: 0.900 | ||
# Filter Variants that are proximate to each other within this number of range. To turn this off, use 0(zero). | ||
prox: 0 | ||
|
||
[contamination_filters] | ||
avg_depth: no | ||
# If AVG_DEPTH is yes, the below DP threshold will be ignored. Instead, LOW_DEPTH and HIGH_DEPTH filter parameter will be used. | ||
# Filter variants with Depth less than the below threshold | ||
dp: 3 | ||
# A value of 2 means that regions with less than half of the average coverage of the entire genome will fail | ||
low_depth: 2 | ||
# A value of 5 means that regions with 5x depth greater than the average coverage will fail | ||
high_depth: 5 | ||
# Filter variants with FQ(Consensus Quality) greater than the below threshold | ||
fq: -20.00 | ||
fq2: -20.00 | ||
# Filter variants with MQ(Root Mean Square Quality) less than the below threshold | ||
mq: 50 | ||
# Filter variants with Variant QUAL less than the below threshold | ||
qual: 0 | ||
# Filter variants with AF1 less than the below threshold | ||
af: 1 | ||
# Filter Variants that are proximate to each other | ||
prox: 1 | ||
|
||
## Functional class filters. Find Phage and repetitive regions in reference genome, Mask regions of reference genome and dont call variants against it, Mask features with mobile element annotations in it. | ||
[functional_filters] | ||
# If apply_functional_filters is set to no, the pipeline will not find Phage/Repeat regions and Mask positions provided in mask_file. | ||
apply_functional_filters: yes | ||
find_phage_region: yes | ||
find_repetitive_region: yes | ||
mask_region: no | ||
mask_file: /nfs/esnitkin/bin_group/variant_calling_bin/reference/test_file.txt | ||
mobile_elements: yes | ||
# If apply_to_calls is set to no, PHAGE/REPEAT/MASK will still run but will not remove calls falling in this region. | ||
apply_to_calls: yes | ||
|
||
##SNP annotations | ||
[snpeff] | ||
snpeff_bin: /snpEff/ | ||
base_cmd: snpEff.jar | ||
snpeff_parameters: -d -no-downstream -no-upstream | ||
prebuild: no | ||
db: Staphylococcus_aureus_subsp_aureus_usa300_tch1516 | ||
dataDir: /data/ | ||
|
||
#CLUSTER_SNP: 3 | ||
#CLUSTER_WINDOW_SNP: 10 | ||
#MLEAF_SNP: 0.95 | ||
#QD_SNP: 10.0 | ||
#FS_SNP: 10.0 | ||
#HAPLO_SNP: 20.0 | ||
|
||
######################################################################################################################## | ||
|
||
# Reference Genome to be used for pipeline | ||
# Set path for already indexed reference genome | ||
|
||
# Most Frequently used reference genomes | ||
|
||
# Name of the reference genome. Provide this value with -index argument. | ||
[KPNIH1] | ||
# Name of reference genome fasta file. | ||
Ref_Name: KPNIH1.fasta | ||
# path to the reference genome fasta file. | ||
Ref_Path: /nfs/esnitkin/bin_group/variant_calling_bin/reference/KPNIH1_test/ | ||
|
||
|
||
[CFT073] | ||
# Name of reference genome fasta file. | ||
Ref_Name: EscherichiacoliCFT073.fna | ||
# path to the reference genome fasta file. | ||
Ref_Path: /nfs/esnitkin/bin_group/variant_calling_bin/reference/CFT073 | ||
|
Oops, something went wrong.