Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unique lanes for freebayes, groupKey, add new indices, copy meta maps #549

Merged
merged 39 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
20b9cf1
Publish pileuptables to tumor_vs_normal results dir
FriederikeHanssen May 16, 2022
7476ec1
fix output path
FriederikeHanssen May 16, 2022
f59312f
update modules
FriederikeHanssen May 16, 2022
def2fc7
Add freebayes paired
FriederikeHanssen May 16, 2022
61f5f4b
Update subway map with freebayes & cnvkit
FriederikeHanssen May 16, 2022
7738eeb
Add freebayes paired
FriederikeHanssen May 16, 2022
1fe6a20
Update subway map with freebayes & cnvkit
FriederikeHanssen May 16, 2022
5c33893
Merge remote-tracking branch 'origin/freebayes' into freebayes
FriederikeHanssen May 16, 2022
dc15c76
Publish pileuptables to tumor_vs_normal results dir
FriederikeHanssen May 16, 2022
fbc06a6
fix output path
FriederikeHanssen May 16, 2022
95773f3
Empty channel for optional output
FriederikeHanssen May 16, 2022
a48d95e
Merge remote-tracking branch 'origin/freebayes' into freebayes
FriederikeHanssen May 16, 2022
e097905
Test out readgroupID with sample & add freebayes tests that run through
FriederikeHanssen May 16, 2022
2c380fe
Merge remote-tracking branch 'origin/dev' into freebayes
FriederikeHanssen May 17, 2022
6de25cf
Fix paired test with new lane numbe testing for uniqueness
FriederikeHanssen May 17, 2022
5e07918
add bcftools sort
FriederikeHanssen May 17, 2022
0faa9bf
test with intervals
FriederikeHanssen May 17, 2022
b9a727c
Use new_meta for all groupKeys
FriederikeHanssen May 17, 2022
2861cee
only publish after sorting
FriederikeHanssen May 17, 2022
a424889
Conditional output naming for paired or single
FriederikeHanssen May 17, 2022
eb5b628
add flowcell code
FriederikeHanssen May 18, 2022
33abb06
remove println statements
FriederikeHanssen May 18, 2022
ec4f519
replace clone everywhere, the weird meta error is back
FriederikeHanssen May 19, 2022
8064849
fix some id tags
FriederikeHanssen May 19, 2022
705103c
fix some id tags
FriederikeHanssen May 19, 2022
f9e3eb9
Fix new_meta
FriederikeHanssen May 19, 2022
b2e2db0
Update changelog
FriederikeHanssen May 19, 2022
581f8f8
Update changelog
FriederikeHanssen May 19, 2022
ab99229
Groupkey issue, workflow stalling
FriederikeHanssen May 19, 2022
7bff603
revert groupkey changes, save for new PR
FriederikeHanssen May 19, 2022
3b5a95a
Actually use groupKey
FriederikeHanssen May 20, 2022
bb22bf3
typo
FriederikeHanssen May 20, 2022
6b17c4b
copy meta also for variantcaller to make it work
FriederikeHanssen May 20, 2022
d8c4eac
typo
FriederikeHanssen May 20, 2022
df2fe42
fix meta for manta
FriederikeHanssen May 20, 2022
3874667
typo strelka sw
FriederikeHanssen May 21, 2022
4bc38ea
variantcaller copy for mutect2
FriederikeHanssen May 21, 2022
d832dd2
rename hashtable to dragmap
FriederikeHanssen May 23, 2022
cf11408
add pon default file to igenomoes
FriederikeHanssen May 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back
- [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files
- [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals
- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311)

### Deprecated

Expand Down
26 changes: 21 additions & 5 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,9 @@ process{
// For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE
// (exit code 141). Rerunning the process will usually work.
errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'}
ext.args = { params.no_intervals ? "-n" : "" }
}
withName : 'TABIX_VC_.*' {
ext.when = { params.no_intervals }
ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set..
}
withName : 'BGZIP_VC_.*' {
ext.when = { !params.no_intervals }
publishDir = [
enabled: false
]
Expand Down Expand Up @@ -525,15 +521,23 @@ process{
]
}
withName: 'FREEBAYES' {
ext.prefix = {"${meta.id}.freebayes"} //To make sure no naming conflicts ensue with module BCFTOOLS_SORT & the naming being correct in the output folder
ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1'
ext.when = { params.tools && params.tools.contains('freebayes') }
publishDir = [
enabled: false
]
}

withName: 'BCFTOOLS_SORT' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" },
pattern: "*vcf.gz",
saveAs: { meta.num_intervals > 1 ? null : it }
]
}

withName : 'TABIX_VC_FREEBAYES' {
publishDir = [
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -900,6 +904,18 @@ process{
}
}

//FREEBAYES
withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' {
ext.args = "--pooled-continuous \
--pooled-discrete \
--genotype-qualities \
--report-genotype-likelihood-max \
--allele-balance-priors-off \
--min-alternate-fraction 0.03 \
--min-repeat-entropy 1 \
--min-alternate-count 2 "
}

//MANTA
withName: 'CONCAT_MANTA_SOMATIC' {
ext.prefix = {"${meta.id}.somatic_sv"}
Expand Down
Binary file modified docs/images/sarek_subway.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
372 changes: 178 additions & 194 deletions docs/images/sarek_subway.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 4 additions & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"ascat": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
},
"bcftools/sort": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"bcftools/stats": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
Expand Down Expand Up @@ -130,7 +133,7 @@
"git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7"
},
"gatk4/markduplicates": {
"git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7"
"git_sha": "df2620cfc7e4c21b14ed03c1c928f09fbabf83c4"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was that expected to be different?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah module update. Someone changed the default ressource usage, which makes sense IMO

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, i see 👍

},
"gatk4/markduplicatesspark": {
"git_sha": "e04970b7d249365cafa5a52912f9a28840481c05"
Expand Down
35 changes: 35 additions & 0 deletions modules/nf-core/modules/bcftools/sort/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions modules/nf-core/modules/bcftools/sort/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion modules/nf-core/modules/gatk4/markduplicates/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 6 additions & 10 deletions subworkflows/local/germline_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,32 +38,28 @@ workflow GERMLINE_VARIANT_CALLING {
// Remap channel with intervals
cram_recalibrated_intervals = cram_recalibrated.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName

//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals

[new_meta, cram, crai, intervals_new]
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
cram, crai, intervals_new]
}

// Remap channel with gzipped intervals + indexes
cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi)
.map{ meta, cram, crai, bed_tbi, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName

//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]

[new_meta, cram, crai, bed_new, tbi_new]
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
cram, crai, bed_new, tbi_new]
}

// DEEPVARIANT
Expand Down
32 changes: 19 additions & 13 deletions subworkflows/local/pair_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main'
include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main'
include { RUN_CONTROLFREEC_SOMATIC } from '../nf-core/variantcalling/controlfreec/somatic/main.nf'
include { RUN_FREEBAYES as RUN_FREEBAYES_SOMATIC } from '../nf-core/variantcalling/freebayes/main.nf'
include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf'
include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf'

Expand Down Expand Up @@ -34,6 +35,7 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = Channel.empty()

//TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
freebayes_vcf = Channel.empty()
manta_vcf = Channel.empty()
strelka_vcf = Channel.empty()
msisensorpro_output = Channel.empty()
Expand All @@ -42,32 +44,28 @@ workflow PAIR_VARIANT_CALLING {
// Remap channel with intervals
cram_pair_intervals = cram_pair.combine(intervals)
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName

//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals

[new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
}

// Remap channel with gzipped intervals + indexes
cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi)
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_tbi, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName

//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]

[new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
}

if (tools.contains('controlfreec')){
Expand All @@ -93,6 +91,13 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_CONTROLFREEC_SOMATIC.out.versions)
}

if (tools.contains('freebayes')){
RUN_FREEBAYES_SOMATIC(cram_pair_intervals, fasta, fasta_fai, intervals_bed_combine_gz)

freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf
ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions)
}

if (tools.contains('manta')) {
RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi,
fasta,
Expand All @@ -114,14 +119,14 @@ workflow PAIR_VARIANT_CALLING {
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, bed_tbi, num_intervals ->

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName

//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]

[new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
[[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
}
} else {
cram_pair_strelka = cram_pair_intervals_gz_tbi.map{
Expand Down Expand Up @@ -172,6 +177,7 @@ workflow PAIR_VARIANT_CALLING {
// }

emit:
freebayes_vcf
manta_vcf
msisensorpro_output
mutect2_vcf
Expand Down
17 changes: 6 additions & 11 deletions subworkflows/local/tumor_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// Should be only run on patients without normal sample
//

//include { RUN_CONTROLFREEC } from '../nf-core/variantcalling/controlfreec/main.nf'
include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf'
include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main'
include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf'
Expand Down Expand Up @@ -44,32 +43,28 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
// Remap channel with intervals
cram_recalibrated_intervals = cram_recalibrated.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName

//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals

[new_meta, cram, crai, intervals_new]
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
cram, crai, intervals_new]
}

// Remap channel with gzipped intervals + indexes
cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi)
.map{ meta, cram, crai, bed_tbi, num_intervals ->
new_meta = meta.clone()

// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
new_meta.num_intervals = num_intervals
new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName

//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]

[new_meta, cram, crai, bed_new, tbi_new]
[[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
cram, crai, bed_new, tbi_new]
}

if(tools.contains('controlfreec')){
Expand Down
Loading