Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ciriquant quantification #172

Merged
merged 49 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
49ffa9b
Simplify ciriquant config creation
nictru Aug 23, 2024
75c7220
Implement support for single-stranded libraries
nictru Aug 23, 2024
e2f459c
Add support for bed files in ciriquant
nictru Aug 23, 2024
ad6a32d
Add support for multiple quantification methods
nictru Aug 23, 2024
44c8e63
Implement first bit of global ciriquant quantification
nictru Aug 23, 2024
71408b2
Enable ciriquant gene quantification
nictru Aug 23, 2024
38930d9
Implement ciriquant circ count extraction
nictru Aug 24, 2024
6276055
Generalize gffread module
nictru Aug 24, 2024
5a77dd4
Further generalize gffread
nictru Aug 24, 2024
8036e87
Improve gffread fasta creation
nictru Aug 24, 2024
d7ef965
Use nf-core gffread module and add pygtftk
nictru Aug 24, 2024
f4b1925
Implement gene expression extraction
nictru Aug 24, 2024
6f8771c
Implement ciriquant expression column naming
nictru Aug 24, 2024
73d7348
Implement ciriquant expression joining
nictru Aug 24, 2024
4af0de5
Improve ciriquant handling
nictru Aug 24, 2024
4b76177
Implement channel handling for ciriquant DEA
nictru Aug 24, 2024
6c7d2b6
Implement ciriquant_dea samplesheet creation
nictru Aug 24, 2024
3510258
Implement ciriquant DEA preprocessing
nictru Aug 24, 2024
b5298d9
Implement capturing of stringtie results from ciriquant
nictru Aug 24, 2024
e775670
Implement basic stringtie_prepde
nictru Aug 24, 2024
285c24c
Switch back to official ciriquant image to prevent stringtie compatib…
nictru Aug 24, 2024
f622e21
Rename ciriquant DE preparation module
nictru Aug 24, 2024
aa2ba68
Implement ciriquant de analysis
nictru Aug 24, 2024
d34e2fc
Improve ciriquant de output structure
nictru Aug 24, 2024
c57e807
Add ciriquant dockerfile
nictru Aug 24, 2024
328393b
Update ciriquant docker image
nictru Aug 24, 2024
084d3b5
Change ciriquant de output format to csv
nictru Aug 24, 2024
7c24732
Fix ciriquant de caching problems
nictru Aug 24, 2024
43eeb8f
Fix ciriquant de version capture problems
nictru Aug 24, 2024
ca318b4
Make sure ciriquant de is only executed once per pairing
nictru Aug 24, 2024
d7cd7a4
Add proper handling of ciriquant RDS
nictru Aug 24, 2024
d753d55
Update ciriquant image
nictru Aug 25, 2024
57d440c
Add retries to some memory intensive processes
nictru Aug 26, 2024
96eaaaf
Increase annotation resource label
nictru Aug 26, 2024
9315d69
Update ciriquant image
nictru Aug 26, 2024
293f6c8
Improve ciriquant output directory definition
nictru Aug 27, 2024
18ec77f
Tweak ciriquant publishdir definitions
nictru Aug 27, 2024
18d960a
Switch miRNA majority vote to polars
nictru Aug 27, 2024
0c93829
Fix some problems with updated mirna majority vote
nictru Aug 27, 2024
748dc0d
Further improve mirna majority vote performance
nictru Aug 27, 2024
70f8533
Implement custom sample joining script
nictru Aug 27, 2024
fa254bc
Fix minor bug in new sample joining module
nictru Aug 27, 2024
3cb98b1
Add default value for mature parameter
nictru Sep 24, 2024
288f06a
Fix null value in publishDir
nictru Sep 24, 2024
1e6b98d
Add quantification_tools to nextflow_schema.json
nictru Sep 24, 2024
a90fd39
Fix editorconfig
nictru Sep 25, 2024
b993d16
Move save_intermediates to less prominent spot in docs
nictru Sep 25, 2024
fdc4f3c
Simplify parameter docs structure
nictru Sep 25, 2024
ddaed0a
Fix schema error
nictru Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 87 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ process {
]
}

withName: '.*:CIRIQUANT:MAIN' {
withName: '.*:BSJ_DETECTION:CIRIQUANT:MAIN' {
ext.args = "--no-gene"
publishDir = [
path: { "${params.outdir}/bsj_detection/tools/ciriquant/intermediates/${meta.id}" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -664,6 +665,7 @@ process {

withName: 'COMBINE_COUNTS_PER_TOOL' {
ext.args = "-f 1 -t -O"
maxRetries = 3
publishDir = [
path: { "${params.outdir}/bsj_detection/tools/${meta.id}" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -787,6 +789,7 @@ process {

withName: '.*:ANNOTATE_(COMBINED|PER_SAMPLE|PER_SAMPLE_TOOL):ANNOTATE' {
ext.prefix = { "${meta.id}.annotated" }
maxRetries = 3
}

withName: '.*:ANNOTATE_COMBINED:ANNOTATE' {
Expand Down Expand Up @@ -868,6 +871,7 @@ process {
}

withName: TRANSCRIPTOME {
ext.args = "-w"
publishDir = [
path: { "${params.outdir}/quantification/transcriptome" },
mode: params.publish_dir_mode,
Expand All @@ -888,6 +892,62 @@ process {
]
}

withName: '.*:QUANTIFICATION:CIRIQUANT:MAIN' {
publishDir = [
[
path: { "${params.outdir}/quantification/ciriquant/results/transcripts" },
mode: params.publish_dir_mode,
pattern: "*/gene/*_out.gtf",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename.split('/').last() }
],
[
path: { "${params.outdir}/quantification/ciriquant/results/genes" },
mode: params.publish_dir_mode,
pattern: "*/gene/*_genes.list",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename.split('/').last() }
],
[
path: { "${params.outdir}/quantification/ciriquant/results/circs" },
mode: params.publish_dir_mode,
pattern: "*/*.gtf",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename.split('/').last() }
]
]
}

withName: '.*:QUANTIFICATION:CIRIQUANT:EXTRACT_CIRC' {
// PYGTFTK process that transforms a GTF file into circ_id, gene_id, count
ext.args = "-k circ_id,gene_id,score -s \\\t"
ext.suffix = "circ.tsv"
publishDir = [
path: { "${params.outdir}/quantification/ciriquant/circ" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*:QUANTIFICATION:CIRIQUANT:EXTRACT_GENES' {
ext.args = "-v FS='\\t' -v OFS='\\t' '{ print \$1, \$2, \$9 }'"
ext.suffix = "gene.tsv"
publishDir = [
path: { "${params.outdir}/quantification/ciriquant/gene" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: ".*:CIRIQUANT:JOIN_(GENE|CIRC)" {
publishDir = [
path: { "${params.outdir}/quantification/ciriquant/combined" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: ".*:CIRIQUANT:JOIN_CIRC" {
ext.metacols = "circ_id,gene_id"
}

withName: PSIRC_INDEX {
publishDir = [
path: { "${params.outdir}/references/index/psirc" },
Expand All @@ -896,7 +956,7 @@ process {
]
}

withName: PSIRC_QUANT {
withName: RUN_PSIRC_QUANT {
publishDir = [
path: { "${params.outdir}/quantification/samples/${meta.id}/psirc" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -928,7 +988,7 @@ process {
]
}

withName: ".*:JOIN_(GENE|TX)_(COUNTS|TPM)" {
withName: ".*:PSIRC_QUANT:JOIN_(GENE|TX)_(COUNTS|TPM)" {
ext.args = "-f 1,2 -t"
label = "process_medium"
maxRetries = 3
Expand Down Expand Up @@ -1049,4 +1109,28 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: CIRIQUANT_PREPDE {
publishDir = [
path: { "${params.outdir}/statistical_tests/ciriquant_de/ciriquant_prep" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: STRINGTIE_PREPDE {
publishDir = [
path: { "${params.outdir}/statistical_tests/ciriquant_de/stringtie_prep" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: CIRIQUANT_DE {
publishDir = [
path: { "${params.outdir}/statistical_tests/ciriquant_de" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@
"git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08",
"installed_by": ["modules"]
},
"gffread": {
"branch": "master",
"git_sha": "6c996d7fbe0816dcbb68ce587ad5f873313682a1",
"installed_by": ["modules"]
},
"gnu/sort": {
"branch": "master",
"git_sha": "a3cc42943548378b726610f45bb5a79ab3f0b633",
Expand Down
2 changes: 1 addition & 1 deletion modules/local/annotation/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process ANNOTATION {
tag "$meta.id"
label 'process_single'
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand Down
5 changes: 2 additions & 3 deletions modules/local/circtest/circtest/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ process CIRCTEST_CIRCTEST {
'biocontainers/mulled-v2-c79b00aa4647c739dbe7e8480789d3ba67988f2e:0' }"

input:
tuple val(meta) , path(circ_counts)
tuple val(meta2), path(gene_counts)
tuple val(meta3), path(phenotype)
tuple val(meta) , path(gene_counts), path(circ_counts)
tuple val(meta2), path(phenotype)

output:
tuple val(meta), path("${prefix}_summary.txt"), emit: summary
Expand Down
6 changes: 2 additions & 4 deletions modules/local/circtest/prepare/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@ process CIRCTEST_PREPARE {
container "biocontainers/r-base:4.2.1"

input:
tuple val(meta), path(circ_counts)
tuple val(meta2), path(gene_counts)
tuple val(meta), path(gene_counts), path(circ_counts)

output:
tuple val(meta), path('*_circs.tsv'), emit: circ_counts, optional: true
tuple val(meta), path('*_genes.tsv'), emit: gene_counts, optional: true
tuple val(meta), path('*_genes.tsv'), path('*_circs.tsv'), emit: counts, optional: true

path "versions.yml" , emit: versions

Expand Down
4 changes: 4 additions & 0 deletions modules/local/ciriquant/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM community.wave.seqera.io/library/bioconductor-edger_bwa_hisat2_pysam_pruned:f14fb8726c7f0cf8

# Install custom fork
RUN pip install git+https://github.com/nictru/CIRIquant.git@e4916ca7b3370cef54d76ca162be64792d8c1b16
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@ process CIRIQUANT {
tag "$meta.id"
label 'process_high'

conda "bioconda::ciriquant=1.1.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ciriquant:1.1.2--pyhdfd78af_2' :
'biocontainers/ciriquant:1.1.2--pyhdfd78af_2' }"
container "docker.io/nicotru/ciriquant:1.0.4"

input:
tuple val(meta), path(reads)
tuple val(meta2), path(gtf)
tuple val(meta3), path(fasta)
tuple val(meta4), path(bwa)
tuple val(meta5), path(hisat2)
tuple val(meta2), path(bed)
tuple val(meta3), path(gtf)
tuple val(meta4), path(fasta)
tuple val(meta5), path(bwa)
tuple val(meta6), path(hisat2)

output:
tuple val(meta), path("${prefix}/${prefix}.gtf"), emit: gtf
path("${prefix}") , emit: results
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}/${prefix}.gtf") , emit: gtf
tuple val(meta), path("${prefix}/gene/${prefix}_genes.list"), emit: gene_list, optional: true
tuple val(meta), path("${prefix}/gene/${prefix}_out.gtf") , emit: gene_gtf, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -28,6 +27,8 @@ process CIRIQUANT {
def VERSION = '2.1.0'
def strandedness = meta.strandedness ?: 'auto'
def library_type = strandedness == 'auto' ? '' : strandedness == 'unstranded' ? '-l 0' : strandedness == 'forward' ? '-l 1' : '-l 2'
def reads_string = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def bed_string = bed ? "--bed ${bed}" : ''
"""
BWA=`which bwa`
HISAT2=`which hisat2`
Expand All @@ -44,10 +45,9 @@ process CIRIQUANT {

CIRIquant \\
-t ${task.cpus} \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
${reads_string} \\
${bed_string} \\
--config config.yml \\
--no-gene \\
-o ${prefix} \\
-p ${prefix} \\
${library_type} \\
Expand Down
35 changes: 35 additions & 0 deletions modules/local/ciriquant/de/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
process CIRIQUANT_DE {
tag "$meta.id"
label 'process_high'

container "docker.io/nicotru/ciriquant:1.0.4"

input:
tuple val(meta), path(library), path(expression), path(gene)

output:
tuple val(meta), path("${circ_path}"), emit: circ
tuple val(meta), path("${gene_path}"), emit: gene
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${meta.id}"
circ_path = "${prefix}.circ.csv"
gene_path = "${prefix}.gene.csv"
"""
CIRI_DE_replicate \\
--lib ${library} \\
--bsj ${expression} \\
--gene ${gene} \\
--out ${circ_path} \\
--out2 ${gene_path}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ciriquant: \$(echo \$(CIRIquant --version 2>&1) | sed 's/CIRIquant //g' )
END_VERSIONS
"""
}
42 changes: 42 additions & 0 deletions modules/local/ciriquant/prepde/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
process CIRIQUANT_PREPDE {
tag "$meta.id"
label 'process_high'

container "docker.io/nicotru/ciriquant:1.0.4"

input:
tuple val(meta), val(samples), path(gtfs), val(conditions)

output:
tuple val(meta), path("${prefix}_library.tsv") , emit: library
tuple val(meta), path("${prefix}_annotation.tsv"), emit: annotation
tuple val(meta), path("${prefix}_expression.tsv"), emit: expression
tuple val(meta), path("${prefix}_ratio.tsv") , emit: gene
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
samplesheet = [samples, gtfs, conditions]
.transpose()
.collect{ sample, gtf, condition ->
"${sample}\\t${gtf}\\t${condition}" }.join('\\n')
"""
echo -e "${samplesheet}" > samples.txt

prep_CIRIquant -i samples.txt \\
--lib ${prefix}_library.tsv \\
--circ ${prefix}_annotation.tsv \\
--bsj ${prefix}_expression.tsv \\
--ratio ${prefix}_ratio.tsv \\
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ciriquant: \$(echo \$(CIRIquant --version 2>&1) | sed 's/CIRIquant //g' )
END_VERSIONS
"""
}
7 changes: 7 additions & 0 deletions modules/local/majority_vote/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: annotation
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::polars=1.5.0
- conda-forge::pyyaml=6.0.2
8 changes: 4 additions & 4 deletions modules/local/majority_vote/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ process MAJORITY_VOTE {
tag "$meta.id"
label 'process_medium'

conda "bioconda::pandas=1.5.2"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'biocontainers/pandas:1.5.2' }"
'oras://community.wave.seqera.io/library/polars_pyyaml:962a0cf7480258c7' :
'community.wave.seqera.io/library/polars_pyyaml:ad93db0d7bcd508e' }"

input:
tuple val(meta), path(bindingsites)


output:
tuple val(meta), path("${meta.id}.majority.tsv"), emit: tsv
tuple val(meta), path("${meta.id}.targets.tsv") , emit: targets
Expand All @@ -20,6 +19,7 @@ process MAJORITY_VOTE {
task.ext.when == null || task.ext.when

script:
min_tools = params.mirna_tool_filter
template 'majority.py'

stub:
Expand Down
Loading
Loading