-
Notifications
You must be signed in to change notification settings - Fork 14
/
nextflow.config
278 lines (248 loc) · 9.11 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
* -------------------------------------------------
* UCT dada2 Nextflow config file
* -------------------------------------------------
* Default config options for all environments.
* Cluster-specific config options should be saved
* in the conf folder and imported under a profile
* name here.
*/
// Global default params, used in configs
// Some help with time stamps
import java.text.SimpleDateFormat
params {
// Configurable variables
clusterOptions = ''
project = false
precheck = false
email = false
plaintext_email = false
// TODO: this needs to be removed or made more specific
amplicon = '16S'
platform = 'illumina'
// Show help message
help = false
name = false
base = "h3abionet"
version = '1.0-b1' //pipeline version
// Pipeline Options
reads = false
single_end = false // only needed if using '--reads' for input, otherwise this is auto-detected
timestamp = new SimpleDateFormat("yyyy-MM-dd").format(new java.util.Date())
outdir = "./" + timestamp + "-dada2"
// QC
// set to true to run check merging; this can take time for a large
// number of samples, we recommend using this on a smaller subset
check_merging = false
skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s
skip_dadaQC = false // set to run this step by default, this can fail with large sample #'s
skip_multiQC = false // set to run this step by default, this can fail with large sample #'s
// Trimming
// NYI; this bypasses all trimming and QC, assumes primers are removed and sequence(s) ready for DADA2
skip_trimming = false
// when true (default), this sets cutadapt's trimming (which uses linked adapters) to require *both*
// primers be present. With some kits like StrainID this can be an issue (can have some truncated reads
// at the 5' or 3' end) and so can be relaxed by setting to false.
pacbio_strict_match = true
fwdprimer = false
revprimer = false
trimFor = 0
trimRev = 0
truncFor = 0
truncRev = 0
maxEEFor = 2
maxEERev = 2
truncQ = 0 //default
maxN = 0 //default
maxLen = "Inf" // default, this can be coerced in R using as.numeric
minLen = 50 // default
// I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
rmPhiX = "F" // TODO: test using false instead of string
// Error model
qualityBinning = false // false, set to true if using binned qualities (NovaSeq)
errorModel = 'illumina'
// Generate and use priors
// false, set to true to generate a set of 'priors' for the reads;
// note this will be done for each denoised data set prior to
// optional read merging and chimera removal (for example, paired-end
// data would have two prior files, while single-end data would have one)
// Note: this is only implemented for per-sample denoising
generate_priors = false
fwd_priors = ""
rev_priors = ""
// Merging
// paired_type = "overlapping" // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)
minOverlap = 20 // default=20
maxMismatch = 0 // default
trimOverhang = "F"
justConcatenate = "F" // TODO: test using false instead of string
// CF: this is for rescuing unmerged ITS, should
// be off unless really needed, and even then it's questionable. But it is requested sometimes
rescueUnmerged = false
dadaParams = false // !!!Deprecated!!!
dadaOpt = [] // note, this doesn't work well for other R functions
maxMergedLen = 0 // Only run if set > 1
minMergedLen = 0 // Only run if set > 1
// Chimera detection
skipChimeraDetection = false
removeBimeraDenovoOptions = false
// Taxonomic assignment
taxassignment = 'rdp' // default: RDP classifier implementation in dada2
reference = false
species = false
minBoot = 50 // default for dada2
taxLevels = false
taxBatch = 0 // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything
// alignment
skipAlignment = false
aligner = 'DECIPHER' // default
infernalCM = false
// Phylogenetic analysis
runTree = 'phangorn' // default, current alternative is 'fasttree'
// NYI, for dada sample inference pooling (requires all samples)
pool = "pseudo" // TODO: test using false instead of string
// MultiQC
interactiveMultiQC = false
// additional outputs
toBIOM = true // generate BIOM v1 output
toQIIME2 = false // generate QZA artifacts for QIIME2
// Quick hack to clean up sample names, probably unsafe (bobby tables);
// This is now deprecated in favor of using a sample sheet (CSV)
sampleRegex = false
// Renaming
idType = "md5"
/* Experimental!!!
Parameters below are experimental
*/
// Alternative input.
// Sample sheet. This will become the default at some point
input = false
// Pre-chimera sequence tables. This pulls in one or more sequence tables
// from independent sequencing runs, merges them, and runs
// downstream analysis. The only supported sequence table format
// is the original version from DADA2 (ASV names are the
// sequence, with counts per sample). As these are run through
// chimera detection, these should be pre-chimera removal data.
seqTables = false
}
profiles {
ilifu{
includeConfig 'conf/base.config'
includeConfig 'conf/ilifu.config'
}
icts_hpc{
includeConfig 'conf/icts_hpc.config'
includeConfig 'conf/base.config'
}
training {
includeConfig 'conf/base.config'
includeConfig 'conf/training.config'
}
standard {
includeConfig 'conf/standard.config'
}
uiuc_test_illumina {
includeConfig 'conf/test_illumina.config'
process.executor = 'slurm'
params.project = 'h3a'
process.clusterOptions = { "-A $params.project ${params.clusterOptions ?: ''}" }
process.queue = 'hpcbio,lowmem'
process.module = "singularity/3.8.1"
}
uiuc_test_pacbio {
includeConfig 'conf/test_pacbio.config'
process.executor = 'slurm'
params.project = 'h3a'
process.clusterOptions = { "-A $params.project ${params.clusterOptions ?: ''}" }
process.queue = 'hpcbio,lowmem'
process.module = "singularity/3.8.1"
}
uiuc_singularity {
includeConfig 'conf/base.config'
includeConfig 'conf/uiuc_singularity.config'
}
uiuc_development {
includeConfig 'conf/base.config'
includeConfig 'conf/uiuc_development.config'
}
aws_batch {
includeConfig 'conf/base.config'
includeConfig 'conf/aws_batch.config'
}
test_illumina { includeConfig 'conf/test_illumina.config' }
test_pacbio { includeConfig 'conf/test_pacbio.config' }
azure_batch {
includeConfig 'conf/base.config'
includeConfig 'conf/azure_batch.config'
}
docker {
docker.enabled = true
// Avoid this error:
// WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
// Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351
// once this is established and works well, nextflow might implement this behavior as new default.
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
none {
// Don't load any config (for use with custom home configs)
}
}
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
timeline {
enabled = true
file = "${params.outdir}/pipeline_info/dada2_timeline.html"
}
report {
enabled = true
file = "${params.outdir}/pipeline_info/dada2_report.html"
}
trace {
enabled = true
file = "${params.outdir}/pipeline_info/dada2_trace.txt"
}
dag {
enabled = true
file = "${params.outdir}/pipeline_info/dada2_DAG.svg"
}
manifest {
homePage = 'https://github.com/h3abionet/TADA'
description = 'Nextflow DADA2 analysis workflow for UCT CBIO and UIUC HPCBio'
mainScript = 'main.nf'
}
// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
if(type == 'memory'){
try {
if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if(type == 'time'){
try {
if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if(type == 'cpus'){
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}