Merge pull request #96 from jianhong/dev

Fix multiple minor typos for review comments.
nf-core · Jul 19, 2023 · d2d17a9 · d2d17a9
2 parents 1bef3c1 + 59391ab
commit d2d17a9
Show file tree

Hide file tree

Showing 9 changed files with 25 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ The pipeline can also handle the experiment of HiChIP, ChIA-PET, and PLAC-Seq. I
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
 
-On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/hicar/results).
+On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/hicar/results).
 
 ## Pipeline summary
 

diff --git a/bin/pairsqcplot.r b/bin/pairsqcplot.r
@@ -8,7 +8,7 @@
 ######################################################
 
 args = commandArgs(TRUE)
-RE_len = args[1] # either 4 or 6
+RE_len = as.numeric(args[1]) # either 4 or 6
 if(length(args)>1) { report_dir = args[2] } else { report_dir = './report' }
 
 rainbow_w_offset <- function(L, offset = NA){
@@ -149,7 +149,11 @@ plot_orientation_proportion_vs_distance <- function(x, RE_len, xlim=c(2,5), no_x
 
 plot_orientation_log10count_vs_distance <- function(x, RE_len, xlim=c(2,5), no_xlabel=FALSE){
     ind = which(x$distance>=xlim[1] & x$distance<=xlim[2] & x$log10count.Inner!=-100 & x$log10count.Outer!=-100 & x$log10count.Right!=-100 & x$log10count.Left!=-100)
-    ylim=range(x[ind,c('log10count.Inner','log10count.Outer','log10count.Right','log10count.Left')])
+    if(length(ind)>0){
+        ylim=range(x[ind,c('log10count.Inner','log10count.Outer','log10count.Right','log10count.Left')])
+    }else{
+        ylim=range(x[,c('log10count.Inner','log10count.Outer','log10count.Right','log10count.Left')])
+    }
     matplot(x$distance,x[,c('log10count.Inner','log10count.Outer','log10count.Right','log10count.Left')],pch=19,type='o',xlab="", ylab="Contact frequency",lwd=1,lty=1,ylim=ylim, col=COLOR4(), axes=F, xlim=xlim)
     exp_axis(xlim,1)
     exp_axis(ylim,2)

diff --git a/conf/modules.config b/conf/modules.config
@@ -697,7 +697,7 @@ process {
         ]
     }
     // step2: callloops.nf
-    withName: 'HICDCPLUS_CALLLOOPS' {
+    withName: 'HICDCPLUS_CALL_LOOPS' {
         ext.args    = { "--fdr $params.hicdcplus_cutoff_fdr" }
         publishDir  = [
             path: { "${params.outdir}/interactions/hicdcplus" },

diff --git a/docs/usage.md b/docs/usage.md
@@ -52,14 +52,15 @@ TREATMENT,3,AEG588A6_S6_L003_R1_001.fastq.gz,AEG588A6_S6_L003_R2_001.fastq.gz,,
 TREATMENT,3,AEG588A6_S6_L004_R1_001.fastq.gz,AEG588A6_S6_L004_R2_001.fastq.gz,,
 ```
 
-| Column      | Description                                                                                                                                                                           |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `group`     | Custom group name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `replicate` | Biological replicates of the samples.                                                                                                                                                 |
-| `fastq_1`   | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                            |
-| `fastq_2`   | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                            |
-| `md5_1`     | Checksum for fastq_1. The checksums of the files will be check to make sure the file is not truncated if provided.                                                                    |
-| `md5_2`     | Checksum for fastq_2. The checksums of the files will be check to make sure the file is not truncated if provided.                                                                    |
+| Column               | Description                                                                                                                                                                           |
+| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `group`              | Custom group name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
+| `replicate`          | Biological replicates of the samples.                                                                                                                                                 |
+| `techniquereplicate` | technique replicates of the samples. Default is 1.                                                                                                                                    |
+| `fastq_1`            | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                            |
+| `fastq_2`            | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                            |
+| `md5_1`              | Checksum for fastq_1. The checksums of the files will be check to make sure the file is not truncated if provided.                                                                    |
+| `md5_2`              | Checksum for fastq_2. The checksums of the files will be check to make sure the file is not truncated if provided.                                                                    |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
@@ -72,7 +73,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p
 On a large scale, the arrangement of chromosomes are organised into two compartments labelled A ("active") and B ("inactive").
 A/B compartment-associated regions are on the multi-Mb scale and correlate with either open and expression-active chromatin ("A" compartments) or closed and expression inactive chromatin ("B" compartments). A compartments tend to be gene-rich, have high GC-content, contain histone markers for active transcription, and usually displace the interior of the nucleus. The regions in compartment A tend to interact preferentially with A compartment-associated regions than B compartment-associated ones. B compartments, on the other hand, tend to be gene-poor, compact, contain histone markers for gene silencing, and lie on the nuclear periphery.
 
-A topologically associating domain (TAD) is a smaller size genomic region compared to A/B compartments. It is a self-interacting genomic region. Most of the studies indicate TADs regulate gene expression by limiting the enhancer-promoter interaction to each TAD. A number of proteins are known to be associated with TAD formation. The most studied proteins are the protein CTCF and the protein complex cohesin. It has been shown that the TAD boundaries have high levels of CTCF binding and cohesin/lamina shifting edges.
+A topologically associating domain (TAD) is a smaller size genomic region compared to A/B compartments. It is a self-interacting genomic region. Most of the studies indicate TADs regulate gene expression by limiting the enhancer-promoter interaction to each TAD. A number of proteins are known to be associated with TAD formation. The most studied proteins are the protein CCCTC-binding factor (CTCF) and the protein complex cohesin. It has been shown that the TAD boundaries have high levels of CTCF binding and cohesin/lamina shifting edges.
 
 There are multiple available modules to call A/B compartments and TADs.
 

diff --git a/modules/local/hicdcplus/callloops.nf b/modules/local/hicdcplus/callloops.nf
@@ -1,4 +1,4 @@
-process HICDCPLUS_CALLLOOPS {
+process HICDCPLUS_CALL_LOOPS {
     tag "$meta.id"
     label 'process_high'
     label 'process_long'

diff --git a/modules/local/hichipper/creat_yaml.nf → modules/local/hichipper/create_yaml.nf b/modules/local/hichipper/creat_yaml.nf → modules/local/hichipper/create_yaml.nf
diff --git a/modules/local/re_cut.nf b/modules/local/re_cut.nf
@@ -16,11 +16,11 @@ process RE_CUTSITE {
 
     script:
     """
+    restriction_enzyme_cutsite.py $enzyme
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(echo \$(python --version) | sed 's/Python //')
     END_VERSIONS
-
-    restriction_enzyme_cutsite.py $enzyme
     """
 }
diff --git a/subworkflows/local/interaction_caller/hicdcplus.nf b/subworkflows/local/interaction_caller/hicdcplus.nf
@@ -3,7 +3,7 @@
  */
 
 include { HICDCPLUS_FEATURES             } from '../../../modules/local/hicdcplus/features'
-include { HICDCPLUS_CALLLOOPS            } from '../../../modules/local/hicdcplus/callloops'
+include { HICDCPLUS_CALL_LOOPS            } from '../../../modules/local/hicdcplus/callloops'
 
 workflow HICDCPLUS {
     take:
@@ -31,8 +31,8 @@ workflow HICDCPLUS {
                                     .map{[[id:it[0].id, bin:it[2]],
                                         it[1], it[3]]}
         .combine(bedpe, by: 0)
-    ch_loop = HICDCPLUS_CALLLOOPS(ch_reads.combine(additional_param.map{[it[2]]})).interactions
-    ch_versions = ch_versions.mix(HICDCPLUS_CALLLOOPS.out.versions.ifEmpty([]))
+    ch_loop = HICDCPLUS_CALL_LOOPS(ch_reads.combine(additional_param.map{[it[2]]})).interactions
+    ch_versions = ch_versions.mix(HICDCPLUS_CALL_LOOPS.out.versions.ifEmpty([]))
 
     emit:
     interactions = ch_loop                      // channel: [ meta, bin_size, path(bedpe) ]

diff --git a/workflows/hicar.nf b/workflows/hicar.nf
@@ -644,7 +644,7 @@ workflow HICAR {
             bedpe_module_name = 'MAPS_REFORMAT'
             break
         case 'hicdcplus':
-            bedpe_module_name = 'HICDCPLUS_CALLLOOPS'
+            bedpe_module_name = 'HICDCPLUS_CALL_LOOPS'
             break
         case 'peakachu':
             bedpe_module_name = 'PEAKACHU_SCORE'