forked from stenglein-lab/taxonomy_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nextflow.config
203 lines (170 loc) · 6.72 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
params {
// -------------------------
// Input/Output directories
// -------------------------
input_dir = "$baseDir/input/"
fastq_dir = "${params.input_dir}/fastq/"
outdir = "$baseDir/results"
// pattern to match for fastq files
fastq_pattern = "*_R[12]_*.fastq*"
initial_fastqc_dir = "${params.outdir}/initial_fastqc/"
post_trim_fastqc_dir = "${params.outdir}/post_trim_fastqc/"
host_filtered_out_dir = "${params.outdir}/host_filtered_fastq/"
contigs_out_dir = "${params.outdir}/contigs/"
blast_out_dir = "${params.outdir}/blast_output/"
tally_out_dir = "${params.outdir}/tallies/"
virus_seq_out_dir = "${params.outdir}/virus_sequences/"
counts_out_dir = "${params.outdir}/fastq_counts/"
fastq_out_dir = "${params.outdir}/trimmed_fastq/"
bam_out_dir = "${params.outdir}/bam/"
// reports on running the pipeline itself
tracedir = "${outdir}/pipeline_info"
// where are R and shell scripts are found.
R_bindir = "${baseDir}/scripts"
scripts_bindir = "${baseDir}/scripts"
// ------------------
// Trimming settings
// ------------------
always_trim_5p_bases = "0"
always_trim_3p_bases = "1"
// if you have very short library molecules may want to change this
post_trim_min_length = "30"
// cd-hit-dup cutoff for collapsing reads with >= this much fractional similarity
mismatches_allowed = "2"
// --------------------
// Host cell filtering
// --------------------
// Define one of the 2 following parameters:
//
// 1. A 2-column tab-delimited file with:
// - the first column defining dataset IDs or patterns that will
// match dataset IDs
// - the second column will be the path of a bowtie index that will be
// used to filter out host reads
//
// This enables different filtering for different datasets
host_map_file = ""
// 2. The path to a bowtie index that will be used to filter host reads
// for all datasets
//
// params.host_bt_index = "/home/databases/fly/combined_fly_index"
host_bt_index = ""
// min bowtie alignment score to be considered a host-derived read
host_bt_min_score = "60"
// -------------------------
// BLAST and classification
// -------------------------
// minimum length of contigs to keep for further analysis
minimum_contig_length = 40
// classify singletons (reads that don't map to contigs) in addition to just contigs?
// classifying singletons is slower but more thorough
classify_singletons = true
// Blast e-value cutoffs
max_blast_nt_evalue = "1e-10"
max_blasx_nr_evalue = "1e-3"
blast_db_dir = "/home/databases/nr_nt/"
nt_blast_db = "${params.blast_db_dir}/nt"
nr_blast_db = "${params.blast_db_dir}/nr"
nr_diamond_db = "${params.blast_db_dir}/nr.dmnd"
// singularity_pull_docker_container option
//
// turn this parameter on to pull docker containers and convert to singularity
//
// see e.g.: https://nf-co.re/gwas#quick-start, which states:
//
// "If you are persistently observing issues downloading Singularity images directly
// due to timeout or network issues then please use the --singularity_pull_docker_container
// parameter to pull and convert the Docker image instead."
//
// TODO: this option is provided in nf-core pipelines but is it necessary?
// possibly remove this option and the corresponding if/else statment in processes?
//
singularity_pull_docker_container = false
}
process {
// ------------------------------------------------------------
// setup resource usage limits for different types of processes
// ------------------------------------------------------------
// high memory process like blastn (using nt database)
withLabel: 'highmem' {
maxForks = 2
cpus = 24
}
// low memory processes that use multi-threading
// like bowtie2
withLabel: 'lowmem_threaded' {
maxForks = 6
cpus = 8
}
// low memory processes that don't use multi-threading
withLabel: 'lowmem_non_threaded' {
maxForks = 24
cpus = 1
}
}
/*
Profiles allow you to run on different servers or with different base configurations
See: https://www.nextflow.io/docs/latest/config.html#config-profiles
*/
profiles {
local {
exector.name = 'local'
executor.queueSize = 24
executor.cpus = 48
executor.memory = '256 GB'
// if the pipeline has to access system paths outside of $HOME, $PWD, etc
// have to bind those paths to singularity.
// see: https://sylabs.io/guides/latest/user-guide/bind_paths_and_mounts.html
// in this profile, we are pointing to local intallations of NCBI databases
//so need to access those paths
singularity.runOptions = "--bind /home/databases"
params.local_nt_database ="/home/databases/nr_nt/nt"
params.local_diamond_database ="/home/databases/nr_nt/nr.dmnd"
params.remote_blast = false
}
conda {
params.enable_conda = true
process.conda = "./conda/taxonomy_conda_environment.yaml"
singularity.enabled = false
conda.cacheDir = "$HOME/conda_cacheDir"
}
singularity {
params.enable_conda = false
singularity.enabled = true
singularity.autoMounts = true
singularity.cacheDir = "$HOME/singularity_cacheDir"
// singularity.runOptions = "-B /home/databases"
}
test {
includeConfig 'conf/test.config'
}
}
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
enabled = true
file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
enabled = true
file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
enabled = true
file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.pdf"
}
manifest {
name = 'stenglein-lab/taxonomy'
author = 'Mark Stenglein'
homePage = 'https://github.com/stenglein-lab/taxonomy'
description = 'A pipeline to taxonomically classify sequences from Illumina datasets'
mainScript = 'main.nf'
nextflowVersion = '!>=21.04.0'
version = '1.0'
}
// Turn this option on to delete all intermediate files from the analysis
// see: https://www.nextflow.io/docs/latest/config.html
// cleanup = true