Skip to content

Commit

Permalink
Merge branch 'main' into retreat-brainstorming
Browse files Browse the repository at this point in the history
  • Loading branch information
edmundmiller authored Mar 12, 2024
2 parents d105f7f + 186265e commit 7a64f75
Show file tree
Hide file tree
Showing 36 changed files with 962 additions and 10 deletions.
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
params.input = "${projectDir}/assets/test.csv"
47 changes: 45 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,51 @@ include { fromSamplesheet } from 'plugin/nf-validation'

include { BOWTIE_BUILD } from "./modules/nf-core/bowtie/build/main"
include { BOWTIE2_BUILD } from "./modules/nf-core/bowtie2/build/main"
// RNASEQ
include { STAR_GENOMEGENERATE } from "./modules/nf-core/star/genomegenerate/main"
include { HISAT2_EXTRACTSPLICESITES } from "./modules/nf-core/hisat2/extractsplicesites"
include { HISAT2_BUILD } from "./modules/nf-core/hisat2/build"
include { RSEM_PREPAREREFERENCE as MAKE_TRANSCRIPTS_FASTA } from "./modules/nf-core/rsem/preparereference"
include { SALMON_INDEX } from "./modules/nf-core/salmon/index"
include { KALLISTO_INDEX } from "./modules/nf-core/kallisto/index"
include { RSEM_PREPAREREFERENCE as RSEM_PREPAREREFERENCE_GENOME } from "./modules/nf-core/rsem/preparereference"

workflow RNASEQ {
take:
reference // fasta, gtf

main:
reference
.multiMap { meta, fasta, gtf, bed, readme, mito, size ->
fasta: tuple(meta, fasta)
gtf: tuple(meta, gtf)
bed: tuple(meta, bed)
}
.set { input }

STAR_GENOMEGENERATE ( input.fasta, input.gtf )

ch_splicesites = HISAT2_EXTRACTSPLICESITES ( input.gtf ).txt.map { it[1] }
HISAT2_BUILD ( input.fasta, input.gtf, ch_splicesites.map { [ [:], it ] } )

ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( input.fasta, input.gtf ).transcript_fasta

SALMON_INDEX ( input.fasta, ch_transcript_fasta )

KALLISTO_INDEX ( ch_transcript_fasta.map{[ [:], it]} )

RSEM_PREPAREREFERENCE_GENOME ( input.fasta, input.gtf )

emit:
star_index = STAR_GENOMEGENERATE.out.index
hisat2_index = HISAT2_BUILD.out.index
transcript_fasta = ch_transcript_fasta
salmon_index = SALMON_INDEX.out.index
kallisto_index = KALLISTO_INDEX.out.index
rsem_index = RSEM_PREPAREREFERENCE_GENOME.out.index
}

// TODO workflow SAREK {

workflow INDEX {
take:
Expand All @@ -19,17 +63,16 @@ workflow INDEX {

BOWTIE_BUILD ( input.fasta )
BOWTIE2_BUILD ( input.fasta )
STAR_GENOMEGENERATE ( input.fasta, input.gtf )

emit:
// bowtie_index = BOWTIE_BUILD.out.index
bowtie2_index = BOWTIE2_BUILD.out.index
star_index = STAR_GENOMEGENERATE.out.index
}


workflow {
ch_input = Channel.fromSamplesheet("input")

INDEX ( ch_input )
RNASEQ ( ch_input )
}
27 changes: 27 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@
"git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d",
"installed_by": ["modules"]
},
"hisat2/build": {
"branch": "master",
"git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1",
"installed_by": ["modules"]
},
"hisat2/extractsplicesites": {
"branch": "master",
"git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1",
"installed_by": ["modules"]
},
"kallisto/index": {
"branch": "master",
"git_sha": "de5811dd9ca15af1e131806001bcaae909e42021",
"installed_by": ["modules"]
},
"rsem/preparereference": {
"branch": "master",
"git_sha": "301b088c7e9e00c4c80686411383f07173b54d69",
"installed_by": ["modules"],
"patch": "modules/nf-core/rsem/preparereference/rsem-preparereference.diff"
},
"salmon/index": {
"branch": "master",
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
"installed_by": ["modules"],
"patch": "modules/nf-core/salmon/index/salmon-index.diff"
},
"star/genomegenerate": {
"branch": "master",
"git_sha": "0e98289b5bec6e3f8f588a8a9d05e8aacc1179a0",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/hisat2/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: hisat2_build
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hisat2=2.2.1
64 changes: 64 additions & 0 deletions modules/nf-core/hisat2/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process HISAT2_BUILD {
tag "$fasta"
label 'process_high'
label 'process_high_memory'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' :
'biocontainers/hisat2:2.2.1--h1b792b2_3' }"

input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
tuple val(meta3), path(splicesites)

output:
tuple val(meta), path("hisat2") , emit: index
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def avail_mem = 0
if (!task.memory) {
log.info "[HISAT2 index build] Available memory not known - defaulting to 0. Specify process memory requirements to change this."
} else {
log.info "[HISAT2 index build] Available memory: ${task.memory}"
avail_mem = task.memory.toGiga()
}

def ss = ''
def exon = ''
def extract_exons = ''
def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0
if (avail_mem >= hisat2_build_memory) {
log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index"
extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : ""
ss = splicesites ? "--ss $splicesites" : ""
exon = gtf ? "--exon ${gtf.baseName}.exons.txt" : ""
} else {
log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index."
log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check."
}
def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
mkdir hisat2
$extract_exons
hisat2-build \\
-p $task.cpus \\
$ss \\
$exon \\
$args \\
$fasta \\
hisat2/${fasta.baseName}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hisat2: $VERSION
END_VERSIONS
"""
}
61 changes: 61 additions & 0 deletions modules/nf-core/hisat2/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: hisat2_build
description: Builds HISAT2 index for reference genome
keywords:
- build
- index
- fasta
- genome
- reference
tools:
- hisat2:
description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome.
homepage: https://daehwankimlab.github.io/hisat2/
documentation: https://daehwankimlab.github.io/hisat2/manual/
doi: "10.1038/s41587-019-0201-4"
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference fasta file
pattern: "*.{fa,fasta,fna}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- gtf:
type: file
description: Reference gtf annotation file
pattern: "*.{gtf}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- splicesites:
type: file
description: Splices sites in gtf file
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- index:
type: file
description: HISAT2 genome index file
pattern: "*.ht2"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@ntoda03"
maintainers:
- "@ntoda03"
53 changes: 53 additions & 0 deletions modules/nf-core/hisat2/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
nextflow_process {

name "Test Process HISAT2_BUILD"
script "../main.nf"
process "HISAT2_BUILD"
tag "modules"
tag "modules_nfcore"
tag "hisat2"
tag "hisat2/build"
tag "hisat2/extractsplicesites"

test("Should run without failures") {

setup {
run("HISAT2_EXTRACTSPLICESITES") {
script "../../extractsplicesites/main.nf"
process {
"""
input[0] = Channel.of([
[id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true)
])
"""
}
}
}

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = Channel.of([
[id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[1] = Channel.of([ [id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true)
])
input[2] = HISAT2_EXTRACTSPLICESITES.out.txt
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
49 changes: 49 additions & 0 deletions modules/nf-core/hisat2/build/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"Should run without failures": {
"content": [
{
"0": [
[
{
"id": "genome"
},
[
"genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803",
"genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf",
"genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777",
"genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626",
"genome.5.ht2:md5,91198831aaba993acac1734138c5f173",
"genome.6.ht2:md5,265e1284ce85686516fae5d35540994a",
"genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f",
"genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb"
]
]
],
"1": [
"versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0"
],
"index": [
[
{
"id": "genome"
},
[
"genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803",
"genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf",
"genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777",
"genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626",
"genome.5.ht2:md5,91198831aaba993acac1734138c5f173",
"genome.6.ht2:md5,265e1284ce85686516fae5d35540994a",
"genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f",
"genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb"
]
]
],
"versions": [
"versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0"
]
}
],
"timestamp": "2023-10-16T14:42:22.381609786"
}
}
3 changes: 3 additions & 0 deletions modules/nf-core/hisat2/build/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hisat2/build:
- modules/nf-core/hisat2/build/**
- modules/nf-core/hisat2/extractsplicesites/**
7 changes: 7 additions & 0 deletions modules/nf-core/hisat2/extractsplicesites/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: hisat2_extractsplicesites
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hisat2=2.2.1
31 changes: 31 additions & 0 deletions modules/nf-core/hisat2/extractsplicesites/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process HISAT2_EXTRACTSPLICESITES {
tag "$gtf"
label 'process_medium'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' :
'biocontainers/hisat2:2.2.1--h1b792b2_3' }"

input:
tuple val(meta), path(gtf)

output:
tuple val(meta), path("*.splice_sites.txt"), emit: txt
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hisat2: $VERSION
END_VERSIONS
"""
}
Loading

0 comments on commit 7a64f75

Please sign in to comment.