main.nf

#!/usr/bin/env nextflow
/*
========================================================================================
                         nf-core/chipseq
========================================================================================
 nf-core/chipseq Analysis Pipeline.
 #### Homepage / Documentation
 https://github.com/nf-core/chipseq
----------------------------------------------------------------------------------------
*/

def helpMessage() {
    log.info nfcoreHeader()
    log.info"""
    Usage:

    The typical command for running the pipeline is as follows:

      nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker

    Mandatory arguments:
      --input [file]                  Comma-separated file containing information about the samples in the experiment (see docs/usage.md) (Default: './design.csv')
      --fasta [file]                  Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome (Default: false)
      --gtf [file]                    Path to GTF file. Not mandatory when using reference in iGenomes config via --genome (Default: false)
      -profile [str]                  Configuration profile to use. Can use multiple (comma separated)
                                      Available: conda, docker, singularity, awsbatch, test

    Generic
      --single_end [bool]             Specifies that the input is single-end reads (Default: false)
      --seq_center [str]              Sequencing center information to be added to read group of BAM files (Default: false)
      --fragment_size [int]           Estimated fragment size used to extend single-end reads (Default: 200)
      --fingerprint_bins [int]        Number of genomic bins to use when calculating fingerprint plot (Default: 500000)

    References                        If not specified in the configuration file or you wish to overwrite any of the references
      --genome [str]                  Name of iGenomes reference (Default: false)
      --bwa_index [file]              Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa (Default: false)
      --gene_bed [file]               Path to BED file containing gene intervals (Default: false)
      --macs_gsize [str]              Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 (Default: false)
      --blacklist [file]              Path to blacklist regions (.BED format), used for filtering alignments (Default: false)
      --save_reference [bool]         If generated by the pipeline save the BWA index in the results directory (Default: false)

    Trimming
      --clip_r1 [int]                 Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0)
      --clip_r2 [int]                 Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0)
      --three_prime_clip_r1 [int]     Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0)
      --three_prime_clip_r2 [int]     Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0)
      --trim_nextseq [int]            Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0)
      --skip_trimming [bool]          Skip the adapter trimming step (Default: false)
      --save_trimmed [bool]           Save the trimmed FastQ files in the results directory (Default: false)

    Alignments
      --bwa_min_score [int]           Don’t output BWA MEM alignments with score lower than this parameter (Default: false)
      --keep_dups [bool]              Duplicate reads are not filtered from alignments (Default: false)
      --keep_multi_map [bool]         Reads mapping to multiple locations are not filtered from alignments (Default: false)
      --save_align_intermeds [bool]   Save the intermediate BAM files from the alignment step - not done by default (Default: false)

    Peaks
      --narrow_peak [bool]            Run MACS2 in narrowPeak mode (Default: false)
      --broad_cutoff [float]          Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1)
      --macs_fdr [float]              Minimum FDR (q-value) cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false)
      --macs_pvalue [float]           p-value cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false)
      --min_reps_consensus [int]      Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1)
      --save_macs_pileup [bool]       Instruct MACS2 to create bedGraph files normalised to signal per million reads (Default: false)
      --skip_peak_qc [bool]           Skip MACS2 peak QC plot generation (Default: false)
      --skip_peak_annotation [bool]   Skip annotation of MACS2 and consensus peaks with HOMER (Default: false)
      --skip_consensus_peaks [bool]   Skip consensus peak generation (Default: false)

    Differential analysis
      --deseq2_vst [bool]             Use vst transformation instead of rlog with DESeq2 (Default: false)
      --skip_diff_analysis [bool]     Skip differential accessibility analysis (Default: false)

    QC
      --skip_fastqc [bool]            Skip FastQC (Default: false)
      --skip_picard_metrics [bool]    Skip Picard CollectMultipleMetrics (Default: false)
      --skip_preseq [bool]            Skip Preseq (Default: false)
      --skip_plot_profile [bool]      Skip deepTools plotProfile (Default: false)
      --skip_plot_fingerprint [bool]  Skip deepTools plotFingerprint (Default: false)
      --skip_spp [bool]               Skip Phantompeakqualtools (Default: false)
      --skip_igv [bool]               Skip IGV (Default: false)
      --skip_multiqc [bool]           Skip MultiQC (Default: false)

    Other
      --outdir [file]                 The output directory where the results will be saved (Default: './results')
      --publish_dir_mode [str]        Mode for publishing results in the output directory. Available: symlink, rellink, link, copy, copyNoFollow, move (Default: copy)
      --email [email]                 Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits (Default: false)
      --email_on_fail [email]         Same as --email, except only send mail if the workflow is not successful (Default: false)
      --max_multiqc_email_size [str]  Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
      -name [str]                     Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic (Default: false)

    AWSBatch options:
      --awsqueue [str]                The AWSBatch JobQueue that needs to be set when running on AWSBatch
      --awsregion [str]               The AWS Region for your AWS Batch job to run on
      --awscli [str]                  Path to the AWS CLI tool
    """.stripIndent()
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                SET UP CONFIGURATION VARIABLES                       -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// Show help message
if (params.help) {
    helpMessage()
    exit 0
}

// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
    custom_runName = workflow.runName
}

////////////////////////////////////////////////////
/* --         DEFAULT PARAMETER VALUES         -- */
////////////////////////////////////////////////////

// Check if genome exists in the config file
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
    exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
}

// Configurable variables
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.bwa_index = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false : false
params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false
params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false
params.anno_readme = params.genome ? params.genomes[ params.genome ].readme ?: false : false
params.spike_index = params.spiking ? params.genomes[ params.spike_genome ].bwa ?: false : false
// Global variables
def PEAK_TYPE = params.narrow_peak ? 'narrowPeak' : 'broadPeak'

////////////////////////////////////////////////////
/* --          CONFIG FILES                    -- */
////////////////////////////////////////////////////

// Pipeline config
ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true)
ch_output_docs_images = file("$baseDir/docs/images/", checkIfExists: true)

// JSON files required by BAMTools for alignment filtering
if (params.single_end) {
    ch_bamtools_filter_config = file(params.bamtools_filter_se_config, checkIfExists: true)
} else {
    ch_bamtools_filter_config = file(params.bamtools_filter_pe_config, checkIfExists: true)
}

// Header files for MultiQC
ch_peak_count_header = file("$baseDir/assets/multiqc/peak_count_header.txt", checkIfExists: true)
ch_frip_score_header = file("$baseDir/assets/multiqc/frip_score_header.txt", checkIfExists: true)
ch_peak_annotation_header = file("$baseDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true)
ch_deseq2_pca_header = file("$baseDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true)
ch_deseq2_clustering_header = file("$baseDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true)
ch_spp_correlation_header = file("$baseDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true)
ch_spp_nsc_header = file("$baseDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true)
ch_spp_rsc_header = file("$baseDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true)

////////////////////////////////////////////////////
/* --          VALIDATE INPUTS                 -- */
////////////////////////////////////////////////////

if (params.input)     { ch_input = file(params.input, checkIfExists: true) } else { exit 1, 'Samples design file not specified!' }
if (params.gtf)       { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, 'GTF annotation file not specified!' }
if (params.gene_bed)  { ch_gene_bed = file(params.gene_bed, checkIfExists: true) }
if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() }

if (params.fasta) {
    lastPath = params.fasta.lastIndexOf(File.separator)
    bwa_base = params.fasta.substring(lastPath+1)
    ch_fasta = file(params.fasta, checkIfExists: true)
} else {
    exit 1, 'Fasta file not specified!'
}

if (params.bwa_index) {
    lastPath = params.bwa_index.lastIndexOf(File.separator)
    bwa_dir  = params.bwa_index.substring(0,lastPath+1)
    bwa_base = params.bwa_index.substring(lastPath+1)
    Channel
        .fromPath(bwa_dir, checkIfExists: true)
        .set { ch_bwa_index }
}

if (params.spike_index){
    lastPath = params.spike_index.lastIndexOf(File.separator)
    spike_dir = params.spike_index.substring(0,lastPath+1)
    spike_base = params.spike_index.substring(lastPath+1)
    Channel
        .fromPath(spike_dir, checkIfExists: true)
        .set { ch_spike_index }
}

// Save AWS IGenomes file containing annotation version
if (params.anno_readme && file(params.anno_readme).exists()) {
    file("${params.outdir}/genome/").mkdirs()
    file(params.anno_readme).copyTo("${params.outdir}/genome/")
}

////////////////////////////////////////////////////
/* --                   AWS                    -- */
////////////////////////////////////////////////////

if (workflow.profile.contains('awsbatch')) {
    // AWSBatch sanity checking
    if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!'
    // Check outdir paths to be S3 buckets if running on AWSBatch
    // related: https://github.com/nextflow-io/nextflow/issues/813
    if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!'
    // Prevent trace files to be stored on S3 since S3 does not support rolling files.
    if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.'
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                       HEADER LOG INFO                               -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// Header log info
log.info nfcoreHeader()
def summary = [:]
summary['Run Name']               = custom_runName ?: workflow.runName
summary['Data Type']              = params.single_end ? 'Single-End' : 'Paired-End'
summary['Design File']            = params.input
summary['Genome']                 = params.genome ?: 'Not supplied'
summary['Fasta File']             = params.fasta
summary['GTF File']               = params.gtf
if (params.gene_bed)              summary['Gene BED File'] = params.gene_bed
if (params.bwa_index)             summary['BWA Index'] = params.bwa_index
if (params.blacklist)             summary['Blacklist BED'] = params.blacklist
if (params.bwa_min_score)         summary['BWA Min Score'] = params.bwa_min_score
summary['MACS2 Genome Size']      = params.macs_gsize ?: 'Not supplied'
summary['Min Consensus Reps']     = params.min_reps_consensus
if (params.macs_gsize)            summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No'
if (!params.narrow_peak)          summary['MACS2 Broad Cutoff'] = params.broad_cutoff
if (params.macs_fdr)              summary['MACS2 FDR'] = params.macs_fdr
if (params.macs_pvalue)           summary['MACS2 P-value'] = params.macs_pvalue
if (params.skip_trimming) {
    summary['Trimming Step']      = 'Skipped'
} else {
    summary['Trim R1']            = "$params.clip_r1 bp"
    summary['Trim R2']            = "$params.clip_r2 bp"
    summary["Trim 3' R1"]         = "$params.three_prime_clip_r1 bp"
    summary["Trim 3' R2"]         = "$params.three_prime_clip_r2 bp"
    summary['NextSeq Trim']       = "$params.trim_nextseq bp"
}
if (params.seq_center)            summary['Sequencing Center'] = params.seq_center
if (params.single_end)            summary['Fragment Size'] = "$params.fragment_size bp"
summary['Fingerprint Bins']       = params.fingerprint_bins
if (params.keep_dups)             summary['Keep Duplicates'] = 'Yes'
if (params.keep_multi_map)        summary['Keep Multi-mapped'] = 'Yes'
summary['Save Genome Index']      = params.save_reference ? 'Yes' : 'No'
if (params.save_trimmed)          summary['Save Trimmed'] = 'Yes'
if (params.save_align_intermeds)  summary['Save Intermeds'] =  'Yes'
if (params.save_macs_pileup)      summary['Save MACS2 Pileup'] = 'Yes'
if (params.skip_peak_qc)          summary['Skip MACS2 Peak QC'] = 'Yes'
if (params.skip_peak_annotation)  summary['Skip Peak Annotation'] = 'Yes'
if (params.skip_consensus_peaks)  summary['Skip Consensus Peaks'] = 'Yes'
if (params.deseq2_vst)            summary['Use DESeq2 vst Transform'] = 'Yes'
if (params.skip_diff_analysis)    summary['Skip Differential Analysis'] = 'Yes'
if (params.skip_fastqc)           summary['Skip FastQC'] = 'Yes'
if (params.skip_picard_metrics)   summary['Skip Picard Metrics'] = 'Yes'
if (params.skip_preseq)           summary['Skip Preseq'] = 'Yes'
if (params.skip_plot_profile)     summary['Skip plotProfile'] = 'Yes'
if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes'
if (params.skip_spp)              summary['Skip spp'] = 'Yes'
if (params.skip_igv)              summary['Skip IGV'] = 'Yes'
if (params.skip_multiqc)          summary['Skip MultiQC'] = 'Yes'
summary['Max Resources']          = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine)     summary['Container'] = "$workflow.containerEngine - $workflow.container"
summary['Output Dir']             = params.outdir
summary['Launch Dir']             = workflow.launchDir
summary['Working Dir']            = workflow.workDir
summary['Script Dir']             = workflow.projectDir
summary['User']                   = workflow.userName
if (workflow.profile.contains('awsbatch')) {
    summary['AWS Region']         = params.awsregion
    summary['AWS Queue']          = params.awsqueue
    summary['AWS CLI']            = params.awscli
}
summary['Config Profile']         = workflow.profile
if (params.config_profile_description) summary['Config Description'] = params.config_profile_description
if (params.config_profile_contact)     summary['Config Contact']     = params.config_profile_contact
if (params.config_profile_url)         summary['Config URL']         = params.config_profile_url
if (params.email || params.email_on_fail) {
    summary['E-mail Address']     = params.email
    summary['E-mail on failure']  = params.email_on_fail
    summary['MultiQC Max Size']   = params.max_multiqc_email_size
}
log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join('\n')
log.info "-\033[2m--------------------------------------------------\033[0m-"

// Check the hostnames against configured profiles
checkHostname()

// Show a big warning message if we're not running MACS
if (!params.macs_gsize) {
    def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied'
    log.warn "=================================================================\n" +
             "  WARNING! MACS genome size parameter not $warnstring.\n" +
             "  Peak calling, annotation and differential analysis will be skipped.\n" +
             "  Please specify value for '--macs_gsize' to run these steps.\n" +
             "======================================================================="
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                     PARSE DESIGN FILE                               -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * PREPROCESSING: Reformat design file, check validitiy and create IP vs control mappings
 */
process CHECK_DESIGN {
    tag "$design"
    publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode

    input:
    path design from ch_input

    output:
    path 'design_reads.csv' into ch_design_reads_csv
    path 'design_controls.csv' into ch_design_controls_csv

    script:  // This script is bundled with the pipeline, in nf-core/chipseq/bin/
    """
    check_design.py $design design_reads.csv design_controls.csv
    """
}

/*
 * Create channels for input fastq files
 */
if (params.single_end) {
    ch_design_reads_csv
        .splitCsv(header:true, sep:',')
        .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] }
        .into { ch_raw_reads_fastqc;
                ch_raw_reads_trimgalore }
} else {
    ch_design_reads_csv
        .splitCsv(header:true, sep:',')
        .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] }
        .into { ch_raw_reads_fastqc;
                ch_raw_reads_trimgalore }
}

// two names for two conditions. if same group, same antibody, condition DMSO, y 
// ch_design_reads_csv
//         .splitCsv(header:true, sep:',')
//         .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] }
//         .into { ch_raw_reads_fastqc;
//                 ch_raw_reads_trimgalore }

/*
 * Create a channel with [sample_id, control id, antibody, replicatesExist, multipleGroups]
 */
ch_design_controls_csv
    .splitCsv(header:true, sep:',')
    .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] }
    .set { ch_design_controls_csv }

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                     PREPARE ANNOTATION FILES                        -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * PREPROCESSING: Build BWA index
 */
if (!params.bwa_index) {
    process BWA_INDEX {
        tag "$fasta"
        label 'process_high'
        publishDir path: { params.save_reference ? "${params.outdir}/genome" : params.outdir },
            saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode

        input:
        path fasta from ch_fasta

        output:
        path 'BWAIndex' into ch_bwa_index

        script:
        """
        bwa index -a bwtsw $fasta
        mkdir BWAIndex && mv ${fasta}* BWAIndex
        """

    }
}


/*
 * PREPROCESSING: Generate gene BED file
 */
// If --gtf is supplied along with --genome
// Make gene bed from supplied --gtf instead of using iGenomes one automatically
def MAKE_BED = false
if (!params.gene_bed) {
    MAKE_BED = true
} else if (params.genome && params.gtf) {
    if (params.genomes[ params.genome ].gtf != params.gtf) {
        MAKE_BED = true
    }
}
if (MAKE_BED) {
    process MAKE_GENE_BED {
        tag "$gtf"
        label 'process_low'
        publishDir "${params.outdir}/genome", mode: params.publish_dir_mode

        input:
        path gtf from ch_gtf

        output:
        path '*.bed' into ch_gene_bed

        script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/
        """
        gtf2bed $gtf > ${gtf.baseName}.bed
        """
    }
}

/*
 * PREPROCESSING: Prepare genome intervals for filtering
 */
process MAKE_GENOME_FILTER {
    tag "$fasta"
    publishDir "${params.outdir}/genome", mode: params.publish_dir_mode

    input:
    path fasta from ch_fasta
    path blacklist from ch_blacklist.ifEmpty([])

    output:
    path "$fasta"                                      // FASTA FILE FOR IGV
    path '*.fai'                                       // FAI INDEX FOR REFERENCE GENOME
    path '*.bed' into ch_genome_filter_regions         // BED FILE WITHOUT BLACKLIST REGIONS
    path '*.sizes' into ch_genome_sizes_bigwig         // CHROMOSOME SIZES FILE FOR BEDTOOLS

    script:
    blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes"
    """
    samtools faidx $fasta
    cut -f 1,2 ${fasta}.fai > ${fasta}.sizes
    $blacklist_filter > ${fasta}.include_regions.bed
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                        FASTQ QC                                     -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 1: FastQC
 */
process FASTQC {
    tag "$name"
    label 'process_medium'
    publishDir "${params.outdir}/fastqc", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      filename.endsWith('.zip') ? "zips/$filename" : filename
                }

    when:
    !params.skip_fastqc

    input:
    tuple val(name), path(reads) from ch_raw_reads_fastqc

    output:
    path '*.{zip,html}' into ch_fastqc_reports_mqc

    script:
    // Added soft-links to original fastqs for consistent naming in MultiQC
    if (params.single_end) {
        """
        [ ! -f  ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
        fastqc -q -t $task.cpus ${name}.fastq.gz
        """
    } else {
        """
        [ ! -f  ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
        [ ! -f  ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
        fastqc -q -t $task.cpus ${name}_1.fastq.gz
        fastqc -q -t $task.cpus ${name}_2.fastq.gz
        """
    }
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                        ADAPTER TRIMMING                             -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 2: Trim Galore!
 */
if (params.skip_trimming) {
    ch_trimmed_reads = ch_raw_reads_trimgalore
    ch_trimmed_spikes = ch_raw_reads_trimgalore
    ch_trimgalore_results_mqc = Channel.empty()
    ch_trimgalore_fastqc_reports_mqc = Channel.empty()
} else {
    process TRIMGALORE {
        tag "$name"
        label 'process_high'
        publishDir "${params.outdir}/trim_galore", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith('.html')) "fastqc/$filename"
                          else if (filename.endsWith('.zip')) "fastqc/zips/$filename"
                          else if (filename.endsWith('trimming_report.txt')) "logs/$filename"
                          else params.save_trimmed ? filename : null
                    }

        input:
        tuple val(name), path(reads) from ch_raw_reads_trimgalore

        output:
        tuple val(name), path('*.fq.gz') into ch_trimmed_reads, 
                                                ch_trimmed_spikes
        path '*.txt' into ch_trimgalore_results_mqc
        path '*.{zip,html}' into ch_trimgalore_fastqc_reports_mqc

        script:
        // Calculate number of --cores for TrimGalore based on value of task.cpus
        // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019
        // See: https://github.com/nf-core/atacseq/pull/65
        def cores = 1
        if (task.cpus) {
            cores = (task.cpus as int) - 4
            if (params.single_end) cores = (task.cpus as int) - 3
            if (cores < 1) cores = 1
            if (cores > 4) cores = 4
        }

        c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : ''
        c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : ''
        tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : ''
        tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : ''
        nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : ''

        // Added soft-links to original fastqs for consistent naming in MultiQC
        if (params.single_end) {
            """
            [ ! -f  ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
            trim_galore --cores $cores --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz
            """
        } else {
            """
            [ ! -f  ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
            [ ! -f  ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
            trim_galore --cores $cores --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${name}_1.fastq.gz ${name}_2.fastq.gz
            """
        }
    }
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                        ALIGN                                        -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 3.1: Map read(s) with bwa mem
 */
process BWA_MEM {
    tag "$name"
    label 'process_high'

    input:
    tuple val(name), path(reads) from ch_trimmed_reads
    path index from ch_bwa_index.collect()

    output:
    tuple val(name), path('*.bam') into ch_bwa_bam

    script:
    prefix = "${name}.Lb"
    rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'"
    if (params.seq_center) {
        rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'"
    }
    score = params.bwa_min_score ? "-T ${params.bwa_min_score}" : ''
    """
    bwa mem \\
        -t $task.cpus \\
        -M \\
        -R $rg \\
        $score \\
        ${index}/${bwa_base} \\
        $reads \\
        | samtools view -@ $task.cpus -b -h -F 0x0100 -O BAM -o ${prefix}.bam -
    """
}

/*
 * STEP 3.2: Convert BAM to coordinate sorted BAM
 */
process SORT_BAM {
    tag "$name"
    label 'process_medium'
    if (params.save_align_intermeds) {
        publishDir path: "${params.outdir}/bwa/library", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith('.flagstat')) "samtools_stats/$filename"
                          else if (filename.endsWith('.idxstats')) "samtools_stats/$filename"
                          else if (filename.endsWith('.stats')) "samtools_stats/$filename"
                          else filename
                    }
    }

    input:
    tuple val(name), path(bam) from ch_bwa_bam

    output:
    tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_sort_bam_merge, 
                                                            ch_sort_bam_counts
    path '*.{flagstat,idxstats,stats}' into ch_sort_bam_flagstat_mqc;

    script:
    prefix = "${name}.Lb"
    """
    samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam
    samtools index ${prefix}.sorted.bam
    samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat
    samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats
    samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats
    """
}

/*
 * STEP 3.2: Convert BAM to coordinate sorted BAM
 */
process COUNTS {
    tag "$name"
    label 'process_medium'

    when: params.spiking

    publishDir "${params.outdir}/bwa/mergedLibrary/", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('.txt')) "counts/$filename"
                      else filename
                }

    input:
    tuple val(name), path(bam) from ch_sort_bam_counts

    output:
    tuple val(name), file('*.txt') into counts_normal

    """
    samtools view ${bam} -@ $task.cpus -c -F 0x004 -F 0x0008 -f 0x001 -F 0x0400 -F 0x0100 > ${name}.txt
    """
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                    MERGE LIBRARY BAM                                -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 4.1: Merge BAM files for all libraries from same sample replicate
 */
ch_sort_bam_merge
    .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] }
    .groupTuple(by: [0])
    .map { it ->  [ it[0], it[1].flatten() ] }
    .set { ch_sort_bam_merge }

process MERGED_BAM {
    tag "$name"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('.flagstat')) "samtools_stats/$filename"
                      else if (filename.endsWith('.idxstats')) "samtools_stats/$filename"
                      else if (filename.endsWith('.stats')) "samtools_stats/$filename"
                      else if (filename.endsWith('.metrics.txt')) "picard_metrics/$filename"
                      else params.save_align_intermeds ? filename : null
                }

    input:
    tuple val(name), path(bams) from ch_sort_bam_merge

    output:
    tuple val(name), path("*${prefix}.sorted.{bam,bam.bai}") into ch_merge_bam_filter,
                                                                  ch_merge_bam_preseq
    path '*.{flagstat,idxstats,stats}' into ch_merge_bam_stats_mqc
    path '*.txt' into ch_merge_bam_metrics_mqc

    script:
    prefix = "${name}.mLb.mkD"
    bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort()
    def avail_mem = 3
    if (!task.memory) {
        log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.toGiga()
    }
    if (bam_files.size() > 1) {
        """
        picard -Xmx${avail_mem}g MergeSamFiles \\
            ${'INPUT='+bam_files.join(' INPUT=')} \\
            OUTPUT=${name}.sorted.bam \\
            SORT_ORDER=coordinate \\
            VALIDATION_STRINGENCY=LENIENT \\
            TMP_DIR=tmp
        samtools index ${name}.sorted.bam

        picard -Xmx${avail_mem}g MarkDuplicates \\
            INPUT=${name}.sorted.bam \\
            OUTPUT=${prefix}.sorted.bam \\
            ASSUME_SORTED=true \\
            REMOVE_DUPLICATES=false \\
            METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\
            VALIDATION_STRINGENCY=LENIENT \\
            TMP_DIR=tmp

        samtools index ${prefix}.sorted.bam
        samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats
        samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat
        samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats
        """
    } else {
      """
      picard -Xmx${avail_mem}g MarkDuplicates \\
          INPUT=${bam_files[0]} \\
          OUTPUT=${prefix}.sorted.bam \\
          ASSUME_SORTED=true \\
          REMOVE_DUPLICATES=false \\
          METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\
          VALIDATION_STRINGENCY=LENIENT \\
          TMP_DIR=tmp

      samtools index ${prefix}.sorted.bam
      samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats
      samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat
      samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats
      """
    }
}

/*
 * STEP 4.2: Filter BAM file at merged library-level
 */
process MERGED_BAM_FILTER {
    tag "$name"
    label 'process_medium'
    publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (params.single_end || params.save_align_intermeds) {
                          if (filename.endsWith('.flagstat')) "samtools_stats/$filename"
                          else if (filename.endsWith('.idxstats')) "samtools_stats/$filename"
                          else if (filename.endsWith('.stats')) "samtools_stats/$filename"
                          else if (filename.endsWith('.sorted.bam')) filename
                          else if (filename.endsWith('.sorted.bam.bai')) filename
                          else null
                      }
                }

    input:
    tuple val(name), path(bam) from ch_merge_bam_filter
    path bed from ch_genome_filter_regions.collect()
    path bamtools_filter_config from ch_bamtools_filter_config

    output:
    tuple val(name), path('*.{bam,bam.bai}') into ch_filter_bam
    tuple val(name), path('*.flagstat') into ch_filter_bam_flagstat
    path '*.{idxstats,stats}' into ch_filter_bam_stats_mqc
    tuple val(name), path('')

    script:
    prefix = params.single_end ? "${name}.mLb.clN" : "${name}.mLb.flT"
    filter_params = params.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001'
    dup_params = params.keep_dups ? '' : '-F 0x0400'
    multimap_params = params.keep_multi_map ? '' : '-q 1'
    blacklist_params = params.blacklist ? "-L $bed" : ''
    name_sort_bam = params.single_end ? '' : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam"
    """
    samtools view \\
        $filter_params \\
        $dup_params \\
        $multimap_params \\
        $blacklist_params \\
        -b ${bam[0]} \\
        | bamtools filter \\
            -out ${prefix}.sorted.bam \\
            -script $bamtools_filter_config

    samtools index ${prefix}.sorted.bam
    samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat
    samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats
    samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats

    $name_sort_bam
    """
}

/*
 * STEP 4.3: Remove orphan reads from paired-end BAM file
 */
if (params.single_end) {
    ch_filter_bam
        .into { ch_rm_orphan_bam_metrics;
                ch_rm_orphan_bam_bigwig;
                ch_rm_orphan_bam_macs_1;
                ch_rm_orphan_bam_macs_2;
                ch_rm_orphan_bam_phantompeakqualtools;
                ch_rm_orphan_name_bam_counts }

    ch_filter_bam_flagstat
        .into { ch_rm_orphan_flagstat_bigwig;
                ch_rm_orphan_flagstat_macs;
                ch_rm_orphan_flagstat_mqc }

    ch_filter_bam_stats_mqc
        .set { ch_rm_orphan_stats_mqc }
} else {
    process MERGED_BAM_REMOVE_ORPHAN {
        tag "$name"
        label 'process_medium'
        publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith('.flagstat')) "samtools_stats/$filename"
                          else if (filename.endsWith('.idxstats')) "samtools_stats/$filename"
                          else if (filename.endsWith('.stats')) "samtools_stats/$filename"
                          else if (filename.endsWith('.sorted.bam')) filename
                          else if (filename.endsWith('.sorted.bam.bai')) filename
                          else null
                    }

        input:
        tuple val(name), path(bam) from ch_filter_bam

        output:
        tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_rm_orphan_bam_metrics,
                                                             ch_rm_orphan_bam_bigwig,
                                                             ch_rm_orphan_bam_macs_1,
                                                             ch_rm_orphan_bam_macs_2,
                                                             ch_rm_orphan_bam_phantompeakqualtools
        tuple val(name), path("${prefix}.bam") into ch_rm_orphan_name_bam_counts
        tuple val(name), path('*.flagstat') into ch_rm_orphan_flagstat_bigwig,
                                                 ch_rm_orphan_flagstat_macs,
                                                 ch_rm_orphan_flagstat_mqc
        path '*.{idxstats,stats}' into ch_rm_orphan_stats_mqc

        script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/
        prefix = "${name}.mLb.clN"
        """
        bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs

        samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $prefix ${prefix}.bam
        samtools index ${prefix}.sorted.bam
        samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat
        samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats
        samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats
        """
    }
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                 MERGE LIBRARY BAM POST-ANALYSIS                     -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 5.1: Preseq analysis after merging libraries and before filtering
 */
process PRESEQ {
    tag "$name"
    label 'process_medium'
    label 'error_ignore'
    publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: params.publish_dir_mode

    when:
    !params.skip_preseq

    input:
    tuple val(name), path(bam) from ch_merge_bam_preseq

    output:
    path '*.ccurve.txt' into ch_preseq_mqc
    path '*.log'

    script:
    pe = params.single_end ? '' : '-pe'
    """
    preseq lc_extrap \\
        -output ${name}.ccurve.txt \\
        -verbose \\
        -bam \\
        $pe \\
        -seed 1 \\
        ${bam[0]}
    cp .command.err ${name}.command.log
    """
}

/*
 * STEP 5.2: Picard CollectMultipleMetrics after merging libraries and filtering
 */
process PICARD_METRICS {
    tag "$name"
    label 'process_medium'
    publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('_metrics')) "picard_metrics/$filename"
                      else if (filename.endsWith('.pdf')) "picard_metrics/pdf/$filename"
                      else null
                }

    when:
    !params.skip_picard_metrics

    input:
    tuple val(name), path(bam) from ch_rm_orphan_bam_metrics
    path fasta from ch_fasta

    output:
    path '*_metrics' into ch_collectmetrics_mqc
    path '*.pdf'

    script:
    prefix = "${name}.mLb.clN"
    def avail_mem = 3
    if (!task.memory) {
        log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.toGiga()
    }
    """
    picard -Xmx${avail_mem}g CollectMultipleMetrics \\
        INPUT=${bam[0]} \\
        OUTPUT=${prefix}.CollectMultipleMetrics \\
        REFERENCE_SEQUENCE=$fasta \\
        VALIDATION_STRINGENCY=LENIENT \\
        TMP_DIR=tmp
    """
}

/*
 * STEP 5.3: Read depth normalised bigWig
 */
process BIGWIG {
    tag "$name"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('scale_factor.txt')) "scale/$filename"
                      else if (filename.endsWith('.bigWig')) filename
                      else null
                }

    input:
    tuple val(name), path(bam), path(flagstat) from ch_rm_orphan_bam_bigwig.join(ch_rm_orphan_flagstat_bigwig, by: [0])
    path sizes from ch_genome_sizes_bigwig.collect()

    output:
    tuple val(name), path('*.bigWig') into ch_bigwig_plotprofile
    path '*igv.txt' into ch_bigwig_igv
    path '*scale_factor.txt'

    script:
    pe_fragment = params.single_end ? '' : '-pc'
    extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : ''
    """
    SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}')
    echo \$SCALE_FACTOR > ${name}.scale_factor.txt
    genomeCoverageBed -ibam ${bam[0]} -bg -scale \$SCALE_FACTOR $pe_fragment $extend | sort -T '.' -k1,1 -k2,2n >  ${name}.bedGraph

    bedGraphToBigWig ${name}.bedGraph $sizes ${name}.bigWig

    find * -type f -name "*.bigWig" -exec echo -e "bwa/mergedLibrary/bigwig/"{}"\\t0,0,178" \\; > ${name}.bigWig.igv.txt
    """
}

/*
 * STEP 5.4: Generate gene body coverage plot with deepTools plotProfile and plotHeatmap
 */
process PLOTPROFILE {
    tag "$name"
    label 'process_high'
    publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: params.publish_dir_mode

    when:
    !params.skip_plot_profile

    input:
    tuple val(name), path(bigwig) from ch_bigwig_plotprofile
    path bed from ch_gene_bed

    output:
    path '*.plotProfile.tab' into ch_plotprofile_mqc
    path '*.{gz,pdf,mat.tab}'

    script:
    """
    computeMatrix scale-regions \\
        --regionsFileName $bed \\
        --scoreFileName $bigwig \\
        --outFileName ${name}.computeMatrix.mat.gz \\
        --outFileNameMatrix ${name}.computeMatrix.vals.mat.tab \\
        --regionBodyLength 1000 \\
        --beforeRegionStartLength 3000 \\
        --afterRegionStartLength 3000 \\
        --skipZeros \\
        --smartLabels \\
        --numberOfProcessors $task.cpus

    plotProfile --matrixFile ${name}.computeMatrix.mat.gz \\
        --outFileName ${name}.plotProfile.pdf \\
        --outFileNameData ${name}.plotProfile.tab

    plotHeatmap --matrixFile ${name}.computeMatrix.mat.gz \\
        --outFileName ${name}.plotHeatmap.pdf \\
        --outFileNameMatrix ${name}.plotHeatmap.mat.tab
    """
}

/*
 * STEP 5.5: align again to drosophila genome
 */    
if(params.spiking){
    process spiking {
        tag "$name"
        label 'process_high'
        publishDir "${params.outdir}/droso_aligned/", mode: params.publish_dir_mode,
            saveAs: { filename ->
                        if (filename.endsWith('.bam')) "bam/$filename"
                        else if (filename.endsWith('_counts.txt')) "counts/$filename"
                        else if (filename.endsWith('_scaling.txt')) "scale/$filename"
                        else filename
                    }

        when:
        params.spiking

        input:
        tuple val(name), path(reads) from ch_trimmed_spikes
        path index from ch_spike_index.collect()
        tuple val(name), file(counts) from counts_normal

        output:
        tuple val(name), path('*.bam') into spiking
        tuple val(name), path('*.txt') into scalespike

        script:
        prefix = "${name}.Lb"
        rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'"
        if (params.seq_center) {
            rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'"
        }
        score = params.bwa_min_score ? "-T ${params.bwa_min_score}" : ''
        """
        bwa mem \\
            -t $task.cpus \\
            -M \\
            -R $rg \\
            $score \\
            ${index}/${spike_base} \\
            $reads \\
            | samtools sort -@ $task.cpus -o ${prefix}.bam -
        samtools view ${prefix}.bam -@ $task.cpus -c -F 0x004 -F 0x0008 -f 0x001 -F 0x0400 -F 0x0100 > ${name}_counts.txt
        hum=\$(cat ${counts})
        dro=\$(cat ${name}_counts.txt)
        echo \$(bc <<< "scale=8; "\$dro"/("\$hum+\$dro")") >> ${name}_scaling.txt;
        """
    }
}

/*
 * STEP 5.5: Phantompeakqualtools
 */
process PHANTOMPEAKQUALTOOLS {
    tag "$name"
    label 'process_high'
    publishDir "${params.outdir}/bwa/mergedLibrary/phantompeakqualtools", mode: params.publish_dir_mode

    when:
    !params.skip_spp

    input:
    tuple val(name), path(bam) from ch_rm_orphan_bam_phantompeakqualtools
    path spp_correlation_header from ch_spp_correlation_header
    path spp_nsc_header from ch_spp_nsc_header
    path spp_rsc_header from ch_spp_rsc_header

    output:
    path '*.spp.out' into ch_spp_out_mqc
    path '*_mqc.tsv' into ch_spp_csv_mqc
    path '*.pdf'

    script:
    """
    RUN_SPP=`which run_spp.R`
    Rscript -e "library(caTools); source(\\"\$RUN_SPP\\")" -c="${bam[0]}" -savp="${name}.spp.pdf" -savd="${name}.spp.Rdata" -out="${name}.spp.out" -p=$task.cpus
    cp $spp_correlation_header ${name}_spp_correlation_mqc.tsv
    Rscript -e "load('${name}.spp.Rdata'); write.table(crosscorr\\\$cross.correlation, file=\\"${name}_spp_correlation_mqc.tsv\\", sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE,append=TRUE)"
    awk -v OFS='\t' '{print "${name}", \$9}' ${name}.spp.out | cat $spp_nsc_header - > ${name}_spp_nsc_mqc.tsv
    awk -v OFS='\t' '{print "${name}", \$10}' ${name}.spp.out | cat $spp_rsc_header - > ${name}_spp_rsc_mqc.tsv
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                 MERGE LIBRARY PEAK ANALYSIS                         -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// Create channel linking IP bams with control bams
ch_rm_orphan_bam_macs_1
    .combine(ch_rm_orphan_bam_macs_2)
    .set { ch_rm_orphan_bam_macs_1 }

ch_design_controls_csv
    .combine(ch_rm_orphan_bam_macs_1)
    .filter { it[0] == it[5] && it[1] == it[7] }
    .join(ch_rm_orphan_flagstat_macs)
    .map { it ->  it[2..-1] }
    .into { ch_group_bam_macs;
            ch_group_bam_plotfingerprint;
            ch_group_bam_counts }

/*
 * STEP 6.1: deepTools plotFingerprint
 */
process PLOTFINGERPRINT {
    tag "${ip} vs ${control}"
    label 'process_high'
    publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: params.publish_dir_mode

    when:
    !params.skip_plot_fingerprint

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), path(ipbam), val(control), path(controlbam), path(ipflagstat) from ch_group_bam_plotfingerprint

    output:
    path '*.raw.txt' into ch_plotfingerprint_mqc
    path '*.{txt,pdf}'

    script:
    extend = (params.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : ''
    """
    plotFingerprint \\
        --bamfiles ${ipbam[0]} ${controlbam[0]} \\
        --plotFile ${ip}.plotFingerprint.pdf \\
        $extend \\
        --labels $ip $control \\
        --outRawCounts ${ip}.plotFingerprint.raw.txt \\
        --outQualityMetrics ${ip}.plotFingerprint.qcmetrics.txt \\
        --skipZeros \\
        --JSDsample ${controlbam[0]} \\
        --numberOfProcessors $task.cpus \\
        --numberOfSamples $params.fingerprint_bins
    """
}

/*
 * STEP 6.2: Call peaks with MACS2 and calculate FRiP score
 */
process MACS2 {
    tag "${ip} vs ${control}"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('.tsv')) "qc/$filename"
                      else if (filename.endsWith('.igv.txt')) null
                      else filename
                }

    when:
    params.macs_gsize

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), path(ipbam), val(control), path(controlbam), path(ipflagstat) from ch_group_bam_macs
    path peak_count_header from ch_peak_count_header
    path frip_score_header from ch_frip_score_header

    output:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), path("*.$PEAK_TYPE") into ch_macs_homer,
                                                                                                                     ch_macs_qc,
                                                                                                                     ch_macs_consensus
    path '*igv.txt' into ch_macs_igv
    path '*_mqc.tsv' into ch_macs_mqc
    path '*.{bed,xls,gappedPeak,bdg}'

    script:
    broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}"
    format = params.single_end ? 'BAM' : 'BAMPE'
    pileup = params.save_macs_pileup ? '-B --SPMR' : ''
    fdr = params.macs_fdr ? "--qvalue ${params.macs_fdr}" : ''
    pvalue = params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : ''
    """
    macs2 callpeak \\
        -t ${ipbam[0]} \\
        -c ${controlbam[0]} \\
        $broad \\
        -f $format \\
        -g $params.macs_gsize \\
        -n $ip \\
        $pileup \\
        $fdr \\
        $pvalue \\
        --keep-dup all

    cat ${ip}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv

    READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}')
    grep 'mapped (' $ipflagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${ip}", a/\$1}' | cat $frip_score_header - > ${ip}_peaks.FRiP_mqc.tsv

    find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt
    """
}

/*
 * STEP 6.3: Annotate peaks with HOMER
 */
process MACS2_ANNOTATE {
    tag "${ip} vs ${control}"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode

    when:
    params.macs_gsize && !params.skip_peak_annotation

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), path(peak) from ch_macs_homer
    path fasta from ch_fasta
    path gtf from ch_gtf

    output:
    path '*.txt' into ch_macs_annotate

    script:
    """
    annotatePeaks.pl \\
        $peak \\
        $fasta \\
        -gid \\
        -gtf $gtf \\
        -cpu $task.cpus \\
        > ${ip}_peaks.annotatePeaks.txt
    """
}

/*
 * STEP 6.4: Aggregated QC plots for peaks, FRiP and peak-to-gene annotation
 */
process MACS2_QC {
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/qc", mode: params.publish_dir_mode

    when:
    params.macs_gsize && !params.skip_peak_annotation && !params.skip_peak_qc

    input:
    path peaks from ch_macs_qc.collect{ it[-1] }
    path annos from ch_macs_annotate.collect()
    path peak_annotation_header from ch_peak_annotation_header

    output:
    path '*.tsv' into ch_macs_qc_mqc
    path '*.{txt,pdf}'

    script:  // This script is bundled with the pipeline, in nf-core/chipseq/bin/
    """
    plot_macs_qc.r \\
        -i ${peaks.join(',')} \\
        -s ${peaks.join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\
        -o ./ \\
        -p macs_peak

    plot_homer_annotatepeaks.r \\
        -i ${annos.join(',')} \\
        -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\
        -o ./ \\
        -p macs_annotatePeaks

    cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                 CONSENSUS PEAKS ANALYSIS                            -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// Group by ip from this point and carry forward boolean variables
ch_macs_consensus
    .map { it ->  [ it[0], it[1], it[2], it[-1] ] }
    .groupTuple()
    .map { it ->  [ it[0], it[1][0], it[2][0], it[3].sort() ] }
    .set { ch_macs_consensus }

/*
 * STEP 7.1: Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts and UpSetR plot for intersection
 */
process CONSENSUS_PEAKS {
    tag "${antibody}"
    label 'process_long'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('.igv.txt')) null
                      else filename
                }

    when:
    params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path(peaks) from ch_macs_consensus

    output:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path('*.bed') into ch_macs_consensus_bed
    tuple val(antibody), path('*.saf') into ch_macs_consensus_saf
    path '*.boolean.txt' into ch_macs_consensus_bool
    path '*igv.txt' into ch_macs_consensus_igv
    path '*.intersect.{txt,plot.pdf}'

    script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/
    prefix = "${antibody}.consensus_peaks"
    mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',')
    collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',')
    expandparam = params.narrow_peak ? '--is_narrow_peak' : ''
    """
    sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\
        | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt

    macs2_merged_expand.py ${prefix}.txt \\
        ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\
        ${prefix}.boolean.txt \\
        --min_replicates $params.min_reps_consensus \\
        $expandparam

    awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed

    echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf
    awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3,  "+" }' ${prefix}.boolean.txt >> ${prefix}.saf

    plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf

    find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt
    """
}

/*
 * STEP 7.2: Annotate consensus peaks with HOMER, and add annotation to boolean output file
 */
process CONSENSUS_PEAKS_ANNOTATE {
    tag "${antibody}"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode

    when:
    params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks && !params.skip_peak_annotation

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path(bed) from ch_macs_consensus_bed
    path bool from ch_macs_consensus_bool
    path fasta from ch_fasta
    path gtf from ch_gtf

    output:
    path '*.annotatePeaks.txt'

    script:
    prefix = "${antibody}.consensus_peaks"
    """
    annotatePeaks.pl \\
        $bed \\
        $fasta \\
        -gid \\
        -gtf $gtf \\
        -cpu $task.cpus \\
        > ${prefix}.annotatePeaks.txt

    cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt
    paste $bool tmp.txt > ${prefix}.boolean.annotatePeaks.txt
    """
}

// Get BAM and SAF files for each ip
ch_group_bam_counts
    .map { it -> [ it[3], [ it[0], it[1], it[2] ] ] }
    .join(ch_rm_orphan_name_bam_counts)
    .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] }
    .groupTuple()
    .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] }
    .join(ch_macs_consensus_saf)
    .set { ch_group_bam_counts }

/*
 * STEP 7.3: Count reads in consensus peaks with featureCounts
 */
process CONSENSUS_PEAKS_COUNTS {
    tag "${antibody}"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode

    when:
    params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path(bams), path(saf) from ch_group_bam_counts

    output:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path('*featureCounts.txt') into ch_macs_consensus_counts
    path '*featureCounts.txt.summary' into ch_macs_consensus_counts_mqc

    script:
    prefix = "${antibody}.consensus_peaks"
    bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort()
    pe_params = params.single_end ? '' : '-p --donotsort'
    """
    featureCounts \\
        -F SAF \\
        -O \\
        --fracOverlap 0.2 \\
        -T $task.cpus \\
        $pe_params \\
        -a $saf \\
        -o ${prefix}.featureCounts.txt \\
        ${bam_files.join(' ')}
    """
}

/*
 * STEP 7.4: Differential analysis with DESeq2
 */
process CONSENSUS_PEAKS_DESEQ2 {
    tag "${antibody}"
    label 'process_medium'
    publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith('.igv.txt')) null
                      else filename
                }

    when:
    params.macs_gsize && replicatesExist && multipleGroups && !params.skip_consensus_peaks && !params.skip_diff_analysis

    input:
    tuple val(antibody), val(replicatesExist), val(multipleGroups), path(counts) from ch_macs_consensus_counts
    path deseq2_pca_header from ch_deseq2_pca_header
    path deseq2_clustering_header from ch_deseq2_clustering_header

    output:
    path '*.tsv' into ch_macs_consensus_deseq_mqc
    path '*igv.txt' into ch_macs_consensus_deseq_comp_igv
    path '*.{RData,results.txt,pdf,log}'
    path 'sizeFactors'
    path '*vs*/*.{pdf,txt}'
    path '*vs*/*.bed'

    script:
    prefix = "${antibody}.consensus_peaks"
    bam_ext = params.single_end ? '.mLb.clN.sorted.bam' : '.mLb.clN.bam'
    vst = params.deseq2_vst ? '--vst TRUE' : ''
    """
    featurecounts_deseq2.r \\
        --featurecount_file $counts \\
        --bam_suffix '$bam_ext' \\
        --outdir ./ \\
        --outprefix $prefix \\
        --outsuffix '' \\
        --cores $task.cpus \\
        $vst

    sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt
    sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt
    cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv

    sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt
    sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt
    cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv

    find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                             IGV                                     -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 8: Create IGV session file
 */
process IGV {
    publishDir "${params.outdir}/igv/${PEAK_TYPE}", mode: params.publish_dir_mode

    when:
    !params.skip_igv

    input:
    path fasta from ch_fasta
    path bigwigs from ch_bigwig_igv.collect().ifEmpty([])
    path peaks from ch_macs_igv.collect().ifEmpty([])
    path consensus_peaks from ch_macs_consensus_igv.collect().ifEmpty([])
    path differential_peaks from ch_macs_consensus_deseq_comp_igv.collect().ifEmpty([])

    output:
    path '*.{txt,xml}'

    script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/
    """
    cat *.txt > igv_files.txt
    igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../'
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                          MULTIQC                                    -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * Parse software version numbers
 */
process get_software_versions {
    publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.indexOf('.csv') > 0) filename
                      else null
                }

    output:
    path 'software_versions_mqc.yaml' into ch_software_versions_mqc
    path 'software_versions.csv'

    script:
    """
    echo $workflow.manifest.version > v_pipeline.txt
    echo $workflow.nextflow.version > v_nextflow.txt
    fastqc --version > v_fastqc.txt
    trim_galore --version > v_trim_galore.txt
    echo \$(bwa 2>&1) > v_bwa.txt
    samtools --version > v_samtools.txt
    bedtools --version > v_bedtools.txt
    echo \$(bamtools --version 2>&1) > v_bamtools.txt
    echo \$(plotFingerprint --version 2>&1) > v_deeptools.txt || true
    picard MarkDuplicates --version &> v_picard.txt  || true
    echo \$(R --version 2>&1) > v_R.txt
    python -c "import pysam; print(pysam.__version__)" > v_pysam.txt
    echo \$(macs2 --version 2>&1) > v_macs2.txt
    touch v_homer.txt
    echo \$(featureCounts -v 2>&1) > v_featurecounts.txt
    preseq &> v_preseq.txt
    multiqc --version > v_multiqc.txt
    scrape_software_versions.py &> software_versions_mqc.yaml
    """
}

Channel.from(summary.collect{ [it.key, it.value] })
    .map { k,v -> "<dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }
    .reduce { a, b -> return [a, b].join("\n            ") }
    .map { x -> """
    id: 'nf-core-chipseq-summary'
    description: " - this information is collected when the pipeline is started."
    section_name: 'nf-core/chipseq Workflow Summary'
    section_href: 'https://github.com/nf-core/chipseq'
    plot_type: 'html'
    data: |
        <dl class=\"dl-horizontal\">
            $x
        </dl>
    """.stripIndent() }
    .set { ch_workflow_summary }

/*
 * STEP 9: MultiQC
 */
process MULTIQC {
    publishDir "${params.outdir}/multiqc/${PEAK_TYPE}", mode: params.publish_dir_mode

    when:
    !params.skip_multiqc

    input:
    path (multiqc_config) from ch_multiqc_config
    path (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([])

    path ('software_versions/*') from ch_software_versions_mqc.collect()
    path workflow_summary from ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')

    path ('fastqc/*') from ch_fastqc_reports_mqc.collect().ifEmpty([])
    path ('trimgalore/*') from ch_trimgalore_results_mqc.collect().ifEmpty([])
    path ('trimgalore/fastqc/*') from ch_trimgalore_fastqc_reports_mqc.collect().ifEmpty([])

    path ('alignment/library/*') from ch_sort_bam_flagstat_mqc.collect()
    path ('alignment/mergedLibrary/*') from ch_merge_bam_stats_mqc.collect()
    path ('alignment/mergedLibrary/*') from ch_rm_orphan_flagstat_mqc.collect{it[1]}
    path ('alignment/mergedLibrary/*') from ch_rm_orphan_stats_mqc.collect()
    path ('alignment/mergedLibrary/picard_metrics/*') from ch_merge_bam_metrics_mqc.collect()
    path ('alignment/mergedLibrary/picard_metrics/*') from ch_collectmetrics_mqc.collect()

    path ('macs/*') from ch_macs_mqc.collect().ifEmpty([])
    path ('macs/*') from ch_macs_qc_mqc.collect().ifEmpty([])
    path ('macs/consensus/*') from ch_macs_consensus_counts_mqc.collect().ifEmpty([])
    path ('macs/consensus/*') from ch_macs_consensus_deseq_mqc.collect().ifEmpty([])

    path ('preseq/*') from ch_preseq_mqc.collect().ifEmpty([])
    path ('deeptools/*') from ch_plotfingerprint_mqc.collect().ifEmpty([])
    path ('deeptools/*') from ch_plotprofile_mqc.collect().ifEmpty([])
    path ('phantompeakqualtools/*') from ch_spp_out_mqc.collect().ifEmpty([])
    path ('phantompeakqualtools/*') from ch_spp_csv_mqc.collect().ifEmpty([])

    output:
    path '*multiqc_report.html' into ch_multiqc_report
    path '*_data'

    script:
    rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
    rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
    custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : ''
    """
    multiqc . -f $rtitle $rfilename $custom_config_file
    """
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                       REPORTS/DOCUMENTATION                         -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/*
 * STEP 10: Output description HTML
 */
process output_documentation {
    publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode

    input:
    path output_docs from ch_output_docs
    path images from ch_output_docs_images

    output:
    path 'results_description.html'

    script:
    """
    markdown_to_html.py $output_docs -o results_description.html
    """
}

/*
 * Completion e-mail notification
 */
workflow.onComplete {

    // Set up the e-mail variables
    def subject = "[nf-core/chipseq] Successful: $workflow.runName"
    if (!workflow.success) {
        subject = "[nf-core/chipseq] FAILED: $workflow.runName"
    }
    def email_fields = [:]
    email_fields['version'] = workflow.manifest.version
    email_fields['runName'] = custom_runName ?: workflow.runName
    email_fields['success'] = workflow.success
    email_fields['dateComplete'] = workflow.complete
    email_fields['duration'] = workflow.duration
    email_fields['exitStatus'] = workflow.exitStatus
    email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
    email_fields['errorReport'] = (workflow.errorReport ?: 'None')
    email_fields['commandLine'] = workflow.commandLine
    email_fields['projectDir'] = workflow.projectDir
    email_fields['summary'] = summary
    email_fields['summary']['Date Started'] = workflow.start
    email_fields['summary']['Date Completed'] = workflow.complete
    email_fields['summary']['Pipeline script file path'] = workflow.scriptFile
    email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId
    if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
    if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
    if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
    email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
    email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
    email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

    // On success try attach the multiqc report
    def mqc_report = null
    try {
        if (workflow.success) {
            mqc_report = ch_multiqc_report.getVal()
            if (mqc_report.getClass() == ArrayList) {
                log.warn "[nf-core/chipseq] Found multiple reports from process 'multiqc', will use only one"
                mqc_report = mqc_report[0]
            }
        }
    } catch (all) {
        log.warn "[nf-core/chipseq] Could not attach MultiQC report to summary email"
    }

    // Check if we are only sending emails on failure
    email_address = params.email
    if (!params.email && params.email_on_fail && !workflow.success) {
        email_address = params.email_on_fail
    }

    // Render the TXT template
    def engine = new groovy.text.GStringTemplateEngine()
    def tf = new File("$baseDir/assets/email_template.txt")
    def txt_template = engine.createTemplate(tf).make(email_fields)
    def email_txt = txt_template.toString()

    // Render the HTML template
    def hf = new File("$baseDir/assets/email_template.html")
    def html_template = engine.createTemplate(hf).make(email_fields)
    def email_html = html_template.toString()

    // Render the sendmail template
    def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ]
    def sf = new File("$baseDir/assets/sendmail_template.txt")
    def sendmail_template = engine.createTemplate(sf).make(smail_fields)
    def sendmail_html = sendmail_template.toString()

    // Send the HTML e-mail
    if (email_address) {
        try {
            if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
            // Try to send HTML e-mail using sendmail
            [ 'sendmail', '-t' ].execute() << sendmail_html
            log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)"
        } catch (all) {
            // Catch failures and try with plaintext
            [ 'mail', '-s', subject, email_address ].execute() << email_txt
            log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (mail)"
        }
    }

    // Write summary e-mail HTML to a file
    def output_d = new File("${params.outdir}/pipeline_info/")
    if (!output_d.exists()) {
        output_d.mkdirs()
    }
    def output_hf = new File(output_d, "pipeline_report.html")
    output_hf.withWriter { w -> w << email_html }
    def output_tf = new File(output_d, "pipeline_report.txt")
    output_tf.withWriter { w -> w << email_txt }

    c_green = params.monochrome_logs ? '' : "\033[0;32m";
    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
    c_red = params.monochrome_logs ? '' : "\033[0;31m";
    c_reset = params.monochrome_logs ? '' : "\033[0m";

    if (workflow.stats.ignoredCount > 0 && workflow.success) {
        log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-"
        log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-"
        log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-"
    }

    if (workflow.success) {
        log.info "-${c_purple}[nf-core/chipseq]${c_green} Pipeline completed successfully${c_reset}-"
    } else {
        checkHostname()
        log.info "-${c_purple}[nf-core/chipseq]${c_red} Pipeline completed with errors${c_reset}-"
    }

}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                       NF-CORE HEADER                                -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

def nfcoreHeader() {
    // Log colors ANSI codes
    c_black = params.monochrome_logs ? '' : "\033[0;30m";
    c_blue = params.monochrome_logs ? '' : "\033[0;34m";
    c_cyan = params.monochrome_logs ? '' : "\033[0;36m";
    c_dim = params.monochrome_logs ? '' : "\033[2m";
    c_green = params.monochrome_logs ? '' : "\033[0;32m";
    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
    c_reset = params.monochrome_logs ? '' : "\033[0m";
    c_white = params.monochrome_logs ? '' : "\033[0;37m";
    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";

    return """    -${c_dim}--------------------------------------------------${c_reset}-
                                            ${c_green},--.${c_black}/${c_green},-.${c_reset}
    ${c_blue}        ___     __   __   __   ___     ${c_green}/,-._.--~\'${c_reset}
    ${c_blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${c_yellow}}  {${c_reset}
    ${c_blue}  | \\| |       \\__, \\__/ |  \\ |___     ${c_green}\\`-._,-`-,${c_reset}
                                            ${c_green}`._,._,\'${c_reset}
    ${c_purple}  nf-core/chipseq v${workflow.manifest.version}${c_reset}
    -${c_dim}--------------------------------------------------${c_reset}-
    """.stripIndent()
}

def checkHostname() {
    def c_reset = params.monochrome_logs ? '' : "\033[0m"
    def c_white = params.monochrome_logs ? '' : "\033[0;37m"
    def c_red = params.monochrome_logs ? '' : "\033[1;91m"
    def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m"
    if (params.hostnames) {
        def hostname = "hostname".execute().text.trim()
        params.hostnames.each { prof, hnames ->
            hnames.each { hname ->
                if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
                    log.error "====================================================\n" +
                            "  ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" +
                            "  but your machine hostname is ${c_white}'$hostname'${c_reset}\n" +
                            "  ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" +
                            "============================================================"
                }
            }
        }
    }
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
/* --                                                                     -- */
/* --                        END OF PIPELINE                              -- */
/* --                                                                     -- */
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////