nextflow.config

/*
 * -------------------------------------------------
 *  UCT dada2 Nextflow config file
 * -------------------------------------------------
 * Default config options for all environments.
 * Cluster-specific config options should be saved
 * in the conf folder and imported under a profile
 * name here.
 */

// Global default params, used in configs
// Some help with time stamps
import java.text.SimpleDateFormat

params {
    // Configurable variables
    clusterOptions = ''
    project = false
    precheck = false
    email = false
    plaintext_email = false

    // TODO: this needs to be removed or made more specific
    amplicon = '16S'
    platform = 'illumina'

    // Show help message
    help = false

    name = false
    base = "h3abionet"
    version = '1.0-b1' //pipeline version
    
    // Pipeline Options
    reads = false
    single_end = false // only needed if using '--reads' for input, otherwise this is auto-detected
    timestamp = new SimpleDateFormat("yyyy-MM-dd").format(new java.util.Date())
    outdir = "./" + timestamp + "-dada2"

    // QC
    // set to true to run check merging; this can take time for a large 
    // number of samples, we recommend using this on a smaller subset
    check_merging = false 
    skip_FASTQC = false  // set to run this step by default, this can fail with large sample #'s
    skip_dadaQC = false  // set to run this step by default, this can fail with large sample #'s
    skip_multiQC = false  // set to run this step by default, this can fail with large sample #'s

    // Trimming
    // NYI; this bypasses all trimming and QC, assumes primers are removed and sequence(s) ready for DADA2
    skip_trimming = false

    // when true (default), this sets cutadapt's trimming (which uses linked adapters) to require *both* 
    // primers be present.  With some kits like StrainID this can be an issue (can have some truncated reads 
    // at the 5' or 3' end) and so can be relaxed by setting to false.
    pacbio_strict_match = true

    fwdprimer = false
    revprimer = false
    trimFor = 0
    trimRev = 0
    truncFor = 0
    truncRev = 0
    maxEEFor = 2
    maxEERev = 2
    truncQ = 0 //default
    maxN = 0 //default
    maxLen = "Inf" // default, this can be coerced in R using as.numeric
    minLen = 50 // default
    // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
    rmPhiX = "F"  // TODO: test using false instead of string

    // Error model
    qualityBinning = false  // false, set to true if using binned qualities (NovaSeq)
    errorModel = 'illumina'

    // Generate and use priors 
    // false, set to true to generate a set of 'priors' for the reads; 
    // note this will be done for each denoised data set prior to 
    // optional read merging and chimera removal (for example, paired-end 
    // data would have two prior files, while single-end data would have one)
    // Note: this is only implemented for per-sample denoising
    generate_priors = false 
    fwd_priors = ""
    rev_priors = ""

    // Merging
    // paired_type = "overlapping"  // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)
    minOverlap = 20 // default=20
    maxMismatch = 0 // default
    trimOverhang = "F"
    justConcatenate = "F"  // TODO: test using false instead of string
    // CF: this is for rescuing unmerged ITS, should 
    // be off unless really needed, and even then it's questionable.  But it is requested sometimes
    rescueUnmerged = false 
    dadaParams = false // !!!Deprecated!!!
    dadaOpt = [] // note, this doesn't work well for other R functions
    maxMergedLen = 0 // Only run if set > 1
    minMergedLen = 0 // Only run if set > 1
    // Chimera detection
    skipChimeraDetection = false
    removeBimeraDenovoOptions = false

    // Taxonomic assignment
    taxassignment = 'rdp' // default: RDP classifier implementation in dada2
    reference = false
    species = false
    minBoot = 50 // default for dada2
    taxLevels = false
    taxBatch = 0  // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything

    // alignment
    skipAlignment = false
    aligner = 'DECIPHER' // default
    infernalCM  = false

    // Phylogenetic analysis
    runTree = 'phangorn' // default, current alternative is 'fasttree'

    // NYI, for dada sample inference pooling (requires all samples)
    pool = "pseudo" // TODO: test using false instead of string

    // MultiQC
    interactiveMultiQC = false

    // additional outputs
    toBIOM = true  // generate BIOM v1 output
    toQIIME2 = false  // generate QZA artifacts for QIIME2

    // Quick hack to clean up sample names, probably unsafe (bobby tables);
    // This is now deprecated in favor of using a sample sheet (CSV)
    sampleRegex = false

    // Renaming
    idType = "md5"

    /*  Experimental!!! 
        Parameters below are experimental
    */
    
    // Alternative input.
    // Sample sheet. This will become the default at some point 
    input = false

    // Pre-chimera sequence tables. This pulls in one or more sequence tables
    // from independent sequencing runs, merges them, and runs
    // downstream analysis. The only supported sequence table format
    // is the original version from DADA2 (ASV names are the
    // sequence, with counts per sample). As these are run through
    // chimera detection, these should be pre-chimera removal data.
    seqTables = false
}

profiles {

  ilifu{
    includeConfig 'conf/base.config'
    includeConfig 'conf/ilifu.config'
   }
  icts_hpc{
    includeConfig 'conf/icts_hpc.config'
    includeConfig 'conf/base.config'
  }
  training {
    includeConfig 'conf/base.config'
    includeConfig 'conf/training.config'
  }
  standard {
    includeConfig 'conf/standard.config'
  }
  uiuc_test_illumina {
    includeConfig 'conf/test_illumina.config'
    process.executor = 'slurm'
    params.project = 'h3a'
    process.clusterOptions = { "-A $params.project ${params.clusterOptions ?: ''}" }
    process.queue = 'hpcbio,lowmem'
    process.module = "singularity/3.8.1"
  }
  uiuc_test_pacbio {
    includeConfig 'conf/test_pacbio.config'
    process.executor = 'slurm'
    params.project = 'h3a'
    process.clusterOptions = { "-A $params.project ${params.clusterOptions ?: ''}" }
    process.queue = 'hpcbio,lowmem'
    process.module = "singularity/3.8.1"
  }
  uiuc_singularity {
    includeConfig 'conf/base.config'
    includeConfig 'conf/uiuc_singularity.config'
  }
  uiuc_development {
    includeConfig 'conf/base.config'
    includeConfig 'conf/uiuc_development.config'
  }   
  aws_batch {
    includeConfig 'conf/base.config'
    includeConfig 'conf/aws_batch.config'
  }
  test_illumina { includeConfig 'conf/test_illumina.config' }
  test_pacbio { includeConfig 'conf/test_pacbio.config' }
  azure_batch {
    includeConfig 'conf/base.config'
    includeConfig 'conf/azure_batch.config'
  }
  docker {
    docker.enabled = true
    // Avoid this error:
    //   WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
    // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351
    // once this is established and works well, nextflow might implement this behavior as new default.
    docker.runOptions = '-u \$(id -u):\$(id -g)'
  }
  singularity {
    singularity.enabled = true
    singularity.autoMounts = true
  }  
  none {
    // Don't load any config (for use with custom home configs)
  }
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

timeline {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_timeline.html"
}
report {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_report.html"
}
trace {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_trace.txt"
}
dag {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_DAG.svg"
}

manifest {
  homePage = 'https://github.com/h3abionet/TADA'
  description = 'Nextflow DADA2 analysis workflow for UCT CBIO and UIUC HPCBio'
  mainScript = 'main.nf'
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
  if(type == 'memory'){
    try {
      if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
        return params.max_memory as nextflow.util.MemoryUnit
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'time'){
    try {
      if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
        return params.max_time as nextflow.util.Duration
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'cpus'){
    try {
      return Math.min( obj, params.max_cpus as int )
    } catch (all) {
      println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
      return obj
    }
  }
}