Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep unresolved Manta calls #723

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion wdl/GatherBatchEvidence.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,8 @@ workflow GatherBatchEvidence {
File? BAF_stats = MatrixQC.BAF_stats
File? Matrix_QC_plot = MatrixQC.QC_plot

Array[File]? manta_tloc = TinyResolve.tloc_manta_vcf
File? manta_tloc_tar = TinyResolve.manta_tloc_tar
File? manta_unresolved_tar = TinyResolve.manta_unresolved_tar

File? metrics_file_batchevidence = GatherBatchEvidenceMetrics.metrics_file
}
Expand Down
106 changes: 103 additions & 3 deletions wdl/TinyResolve.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ workflow TinyResolve {
Int samples_per_shard = 25
String sv_pipeline_docker
String linux_docker
Boolean rm_cpx_type = true
RuntimeAttr? runtime_attr_resolve
RuntimeAttr? runtime_attr_untar
RuntimeAttr? runtime_attr_concattars
}

scatter (disc in discfile) {
Expand Down Expand Up @@ -73,18 +75,29 @@ workflow TinyResolve {
input:
raw_vcfs=GetShardVcfs.shard_items,
samples=GetShardSamples.shard_items,
shard_number = i,
sv_pipeline_docker = sv_pipeline_docker,
cytoband=cytoband,
cytoband_idx=cytoband_idx,
rm_cpx_type = rm_cpx_type,
discfile=GetShardDiscfiles.shard_items,
discfile_idx=GetShardDiscfileIndexes.shard_items,
mei_bed=mei_bed,
runtime_attr_override=runtime_attr_resolve
}
}

call ConcatTars {
input:
manta_tloc_tars=ResolveManta.manta_tloc_tar,
manta_unresolved_tars=ResolveManta.manta_unresolved_tar,
linux_docker=linux_docker,
runtime_attr_override=runtime_attr_concattars
}

output {
Array[File] tloc_manta_vcf = flatten(ResolveManta.tloc_vcf)
File manta_tloc_tar = ConcatTars.manta_tloc_tar
File manta_unresolved_tar = ConcatTars.manta_unresolved_tar
}
}

Expand All @@ -94,11 +107,13 @@ task ResolveManta {
Array[File] raw_vcfs
Array[String] samples
File cytoband_idx
Int shard_number
Array[File] discfile
Array[File] discfile_idx
File cytoband
File mei_bed
String sv_pipeline_docker
Boolean rm_cpx_type
RuntimeAttr? runtime_attr_override
}

Expand Down Expand Up @@ -127,12 +142,36 @@ task ResolveManta {
pe=${discfiles[$i]}
sample_no=`printf %03d $i`
bash /opt/sv-pipeline/00_preprocessing/scripts/mantatloccheck.sh $vcf $pe ${sample_id} ~{mei_bed} ~{cytoband}
mv ${sample_id}.manta.complex.vcf.gz tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz
bcftools sort --output manta.unresolved.vcf.tmp --output-type v manta.unresolved.vcf
mv manta.unresolved.vcf.tmp manta.unresolved.vcf
if [[ ~{true='true' false='false' rm_cpx_type} = 'true' ]]; then
bcftools annotate --include 'INFO/SVTYPE != "CPX"' --keep-sites \
--remove 'INFO/CPX_TYPE' \
--output "tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz" \
--output-type z \
"${sample_id}.manta.complex.vcf.gz"
rm "${sample_id}.manta.complex.vcf.gz"
bcftools annotate --include 'INFO/SVTYPE != "CPX"' --keep-sites \
--remove 'INFO/CPX_TYPE' \
--output "${sample_no}.${sample_id}.manta.unresolved.vcf.gz" \
--output-type z \
manta.unresolved.vcf
rm manta.unresolved.vcf
else
mv ${sample_id}.manta.complex.vcf.gz tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz
bgzip manta.unresolved.vcf
mv manta.unresolved.vcf.gz ${sample_no}.${sample_id}.manta.unresolved.vcf.gz
fi
done
find . -type f -name '*.complex.vcf.gz' \
| tar --create --file='manta_tloc_~{shard_number}.tar' --files-from=-
find . -type f -name '*.unresolved.vcf.gz' \
| tar --create --file='manta_unresolved_~{shard_number}.tar' --files-from=-
>>>

output {
Array[File] tloc_vcf = glob("tloc_*.vcf.gz")
File manta_tloc_tar = 'manta_tloc_${shard_number}.tar'
File manta_unresolved_tar = 'manta_unresolved_${shard_number}.tar'
}

runtime {
Expand All @@ -145,3 +184,64 @@ task ResolveManta {
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}
}

task ConcatTars {
input {
Array[File] manta_tloc_tars
Array[File] manta_unresolved_tars
String linux_docker
RuntimeAttr? runtime_attr_override
}

RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1.0,
disk_gb: ceil(10 + 3 * (size(manta_tloc_tars, "GB") + size(manta_unresolved_tars, "GB"))),
boot_disk_gb: 10,
preemptible_tries: 3,
max_retries: 1
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])

command <<<
set -o errexit
set -o pipefail
set -o nounset

mkdir manta_tloc
while read -r tl; do
tar --extract --file "${tl}" --directory manta_tloc
done < '~{write_lines(manta_tloc_tars)}'
pushd manta_tloc
find . -type f -name '*.vcf.gz' > manifest.list
popd
tar --create --gzip --file manta_tloc.tar.gz --directory manta_tloc \
--files-from manta_tloc/manifest.list
rm -rf manta_tloc

mkdir manta_unresolved
while read -r ur; do
tar --extract --file "${ur}" --directory manta_unresolved
done < '~{write_lines(manta_unresolved_tars)}'
pushd manta_unresolved
find . -type f -name '*.vcf.gz' > manifest.list
popd
tar --create --gzip --file manta_unresolved.tar.gz \
--directory manta_unresolved --files-from manta_unresolved/manifest.list
>>>

runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
docker: linux_docker
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}

output {
File manta_tloc_tar = 'manta_tloc.tar.gz'
File manta_unresolved_tar = 'manta_unresolved.tar.gz'
}
}
Loading