process.config

/*
 * -------------------------------------------------
 *  Nextflow process config file
 * -------------------------------------------------
 * Define here the different level of RAM/CPUs use
 * for the different processes as well as the track
 * reports
 */

params {
  // Limit max resources
  maxCpus = 24
  maxMemory = 150.GB
  maxTime = 48.h
}

process {
  // Defaults for processes without labels
  cpus = { checkMax( 1, 'cpus' ) }
  memory = { checkMax( 3.GB * task.attempt, 'memory' ) }
  time = { checkMax( 24.h * task.attempt, 'time' ) }

  //Retry stategy
  withLabel: retry {
    errorStrategy = { sleep(Math.pow(2, task.attempt) * 30 as long); return 'retry' }
    maxRetries = 2
  }

  // Customise requirements for specific processes
  withLabel: bclautoconf {    
     cpus = { checkMax( 8 * task.attempt, 'cpus' ) }   
     time = { task.attempt > 1 ?  48.h : autoconfTime() }
     memory = { task.attempt > 1 ? 100.GB : autoconfMem() }  
  }

  withLabel: minCpu {
    cpus = { checkMax( 1 * task.attempt, 'cpus' ) }
  }
  withLabel: lowCpu {
    cpus = { checkMax( 2 * task.attempt, 'cpus' )}
  }
  withLabel: medCpu {
    cpus = { checkMax( 4 * task.attempt, 'cpus' ) }
  }
  withLabel: highCpu {
    cpus = { checkMax( 8 * task.attempt, 'cpus' ) }
  }
  withLabel: extraCpu {
    cpus = { checkMax( 16 * task.attempt, 'cpus' ) }
  }

  withLabel: minMem {
    memory = { checkMax( 3.GB * task.attempt, 'memory' ) }
  }
  withLabel: lowMem {
    memory = { checkMax( 4.GB * task.attempt, 'memory' ) }
  }
  withLabel: medMem {
    memory = { checkMax( 8.GB * task.attempt, 'memory' ) }
  }
  withLabel: highMem {
    memory = { checkMax( 16.GB * task.attempt, 'memory' ) }
  }
  withLabel: extraMem {
    memory = { checkMax( 40.GB * task.attempt, 'memory' ) }
  }
  withLabel: gigaMem {
    memory = { checkMax( 100.GB * task.attempt, 'memory' ) }
  }
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

// From nf-core
timeline {
  enabled = true
  file = "${params.summaryDir}/trace/timeline.html"
}
report {
  enabled = true
  file = "${params.summaryDir}/trace/report.html"
}
trace {
  enabled = true
  raw = true
  fields = 'process,task_id,hash,native_id,module,container,tag,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,attempt,workdir,scratch,queue,cpus,memory,disk,time,env'
  file = "${params.summaryDir}/trace/trace.txt"
}
dag {
  enabled = true
  file = "${params.summaryDir}/trace/DAG.pdf"
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def checkMax(obj, type) {
  if(type == 'memory'){
    try {
      if(obj.compareTo(params.maxMemory as nextflow.util.MemoryUnit) == 1)
        return params.maxMemory as nextflow.util.MemoryUnit
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max memory '${params.maxMemory}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'time'){
    try {
      if(obj.compareTo(params.maxTime as nextflow.util.Duration) == 1)
        return params.maxTime as nextflow.util.Duration
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max time '${params.maxTime}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'cpus'){
    try {
      return Math.min( obj, params.maxCpus as int )
    } catch (all) {
      println "   ### ERROR ###   Max cpus '${params.maxCpus}' is not valid! Using default value: $obj"
      return obj
    }
  }
}

// Function to compute resources requirements


def autoconfTime() {
  try {
    //Data size
    def cmd = ["/bin/bash", "-c", "du -h -b ${params.bclDir} | grep \"${params.bclDir}/Data\" | awk '{print \$1}' | tail -n1"]
    def out = cmd.execute()
    out.waitFor()
    if(out.exitValue() != 0){
      throw new RuntimeException("Error executing command: ${cmd}. Exit value: ${out.exitValue()}")
    }
    def dataG = (((out.in.text).trim()).toFloat()) / Math.pow(1024, 3)
    if(dataG > 850.0){
      throw new RuntimeException("Error in auto-configuration step: the model has not been trained on such data size ${dataG}. Default ressources values will be used")
    }

    //Sequencing type
    def novaseq = 0
    def nextseq = 0
    def miseq = 0
    def seqType = params.run[0]
    if (seqType.equals("D")) {
      novaseq = 1
    } else if (seqType.equals("N")) {
      nextseq = 1
    } else if (seqType.equals("L")) {
      miseq = 1
    } else {
      throw new RuntimeException("Error in auto-configuration step: the sequencer model ${seqType} is not supported by the prediction model. Default ressources values will be used.")
    }

    //Prediction
    double[][] coeff = [
            [1],
            [Math.log(dataG)],
            [nextseq],
            [novaseq]
    ]
    def calculatedTime
    def baseTime = -3.48570 + (0.95993 * coeff[1][0]) + (0.86745 * coeff[2][0]) + (0.65005 * coeff[3][0])
    double baseTimeUppr = baseTime + (2 * 0.1628978)
    if(Math.exp(baseTimeUppr) < 5.0){
      calculatedTime = 5.m
    }
    else {
      calculatedTime = (Math.ceil(Math.exp(baseTimeUppr)).toInteger()).m
    }
    return calculatedTime
  } catch (Exception e) {
    println " ### ERROR ### Error in autoconfTime: ${e.message}"
    return params.maxTime
  }
}

def produitM(double[][] a, double[][] b){
  int aRows = a.length
  int aCols = a[0].length
  int bRows = b.length
  int bCols = b[0].length
  if(aCols != bRows){
    throw new RuntimeException("Error in auto-configuration step: Matrix dimensions does not fit. Default ressources values will be used")
  }
  double[][] resM = new double[aRows][bCols]
  double produit

  for(int row; row < aRows; row++){
    for(int col; col < bCols; col++){
      resM[row][col] = 0.00
      for(int scol; scol < aCols; scol++){
        resM[row][col] += a[row][scol] * b[scol][row]
      }
    }
  }
  return resM
}

def transposeM(double[][] a){
  int rows = a.length
  int cols = a[0].length
  double[][] resM = new double[cols][rows]

  for(int row; row < rows; row++){
    for(int col; col < cols; col++){
      resM[col][row] = a[row][col]
    }
  }
  return resM
}

def predictionInterval(double obs, double[][] coeff, double[][] vcov, double var, double tValue){
  double[] interval = new double[2]
  double predVar = var + produitM(produitM(transposeM(coeff),vcov),coeff)[0][0]
  interval[0] = obs - tValue * Math.sqrt(predVar)
  interval[1] = obs + tValue * Math.sqrt(predVar)
  return interval
}

def autoconfMem() {
  try {
    //Lane
    def cmd = ["/bin/bash", "-c", "awk 'BEGIN {found=0} /\\[Data\\]/{getline nextline; if (nextline ~ /Lane/) {found=1}} END {print (found ? \"1\" : \"0\")}' ${params.sampleSheet}"]
    def output = cmd.execute()
    output.waitFor()
    if(output.exitValue() != 0) {
      throw new RuntimeException("Error executing command: ${cmd}. Exit value: ${output.exitValue()}")
    }
    def lane = ((output.in.text).trim()).toInteger()

    //Sequencing type
    def novaseq = 0
    def nextseq = 0
    def miseq = 0
    def seqType = params.run[0]
    if (seqType.equals("D")) {
      novaseq = 1
      cmd = ["/bin/bash", "-c", "find ${params.bclDir} -type f -name '*.cbcl' | wc -l"]
    } else if (seqType.equals("N")) {
      nextseq = 1
      cmd = ["/bin/bash", "-c", "find ${params.bclDir} -type f -name '*.cbcl' | wc -l"]
    } else if (seqType.equals("L")) {
      miseq = 1
      cmd = ["/bin/bash", "-c", "find ${params.bclDir} -type f -name '*.bcl' | wc -l"]
    } else {
      throw new RuntimeException("Error in auto-configuration step: the sequencer model ${seqType} is not supported by the prediction model. Default ressources values will be used.")
    }

    //BCL
    output = cmd.execute()
    output.waitFor()
    if(output.exitValue() != 0) {
      throw new RuntimeException("Error executing command: ${cmd}. Exit value: ${output.exitValue()}")
    }
    def nbBcl = ((output.in.text).trim()).toInteger()

    //Prediction
    def calculatedMemory
    double[][] coeff = [
            [1],
            [Math.log(nbBcl)],
            [nextseq],
            [novaseq],
            [lane]
    ]
    def baseMemory = -2.00122 + (0.37545 * coeff[1][0]) + (3.13089 * coeff[2][0]) + (2.68955 * coeff[3][0]) + (0.15234 * coeff[4][0])
    //Intervalle de prédiction
    double[][] vcovMatrix = [
        [0.045988236, -0.0054460728, -0.0135905683, -0.0108750897, 0.0014025667],
        [-0.005446073, 0.0006820601, 0.0013886247, 0.0010485411, -0.0001756559],
        [-0.013590568, 0.0013886247, 0.0085687684, 0.0046375149, -0.0003576226],
        [-0.010875090, 0.0010485411, 0.0046375149, 0.0048206066, -0.0009759456],
        [0.001402567, -0.0001756559, -0.0003576226, -0.0009759456, 0.0030453434]
    ]
    double baseMemoryUppr = predictionInterval(baseMemory,coeff,vcovMatrix, 0.05506076, 1.977561)[1]
    if (Math.exp(baseMemoryUppr) < 4.00) {
      calculatedMemory = 4.GB
    } else {
      calculatedMemory = (Math.ceil(Math.exp(baseMemoryUppr)).toInteger()).GB
    }
    return calculatedMemory
  } catch  (Exception e) {
    println "### ERROR ### Error in autoconfMemory: ${e.message}"
    return params.maxMemory
  }
}