Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: SVdb merge SV and CNV #886

Merged
merged 13 commits into from
Mar 15, 2022
4 changes: 2 additions & 2 deletions BALSAMIC/constants/workflow_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"mutation": "somatic",
"type": "SV",
"analysis_type": ["paired", "single"],
"sequencing_type": ["wgs", "targeted"],
"sequencing_type": ["targeted", "wgs"],
"workflow_solution": ["BALSAMIC"],
},
"ascat": {
Expand All @@ -89,7 +89,7 @@
"mutation": "somatic",
"type": "SV",
"analysis_type": ["paired", "single"],
"sequencing_type": ["wgs", "targeted"],
"sequencing_type": ["targeted", "wgs"],
"workflow_solution": ["BALSAMIC"],
},
}
Expand Down
2 changes: 1 addition & 1 deletion BALSAMIC/containers/varcall_py36/varcall_py36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ dependencies:
- bioconda::gatk=3.8
- bioconda::vardict=2019.06.04=pl526_0
- bioconda::vardict-java=1.7
- bioconda::svdb=2.5.0
- bioconda::svdb=2.5.1
- conda-forge::libiconv
- conda-forge::r-base=3.6.3
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,12 @@ python {params.merge_ascat_output_script} {output.ascat_output_pdf} {input.sampl

rule svdb_merge_tumor_normal:
input:
manta_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz",
delly_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".delly.vcf.gz",
vcf = expand(
vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
caller=somatic_caller_sv) +
expand(
vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
caller=somatic_caller_cnv)
output:
svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map",
Expand All @@ -222,16 +226,18 @@ rule svdb_merge_tumor_normal:
tumor = get_sample_type(config["samples"], "tumor"),
normal = get_sample_type(config["samples"], "normal"),
case_name = config["analysis"]["case_id"],
vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this work?

def construct_vcf(w, i, sv):
  outp=list()
  for index in range(0,len(i)):
     outp.append(f"i[index]:sv[index]")
  return outp

Copy link
Contributor

@hassanfa hassanfa Mar 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe not. wildcard is the limiting factor, and can cause an issue. Ignore this :-)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe not. wildcard is the limiting factor, and can cause an issue. Ignore this :-)

Also, input is not a list and will not iterate in for....

svdb_priority= ",".join(svdb_callers_prio)
threads:
get_threads(cluster_config, "svdb_merge_tumor_normal")
message:
"Merging Manta and Delly results for PASS variants using svdb for sample '{params.case_name}' "
shell:
"""
svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \
--vcf {input.manta_vcf}:manta {input.delly_vcf}:delly \
--priority manta,delly | \
bgzip -l 9 -c > {output.svdb_vcf}
--vcf {params.vcf} \
--priority {params.svdb_priority} | \
bgzip -l 9 -c > {output.svdb_vcf};

echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,12 @@ tabix -p vcf -f {output.vcf};

rule svdb_merge_tumor_only:
input:
manta_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz",
delly_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".delly.vcf.gz",
vcf = expand(
vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
caller=somatic_caller_sv) +
expand(
vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
caller=somatic_caller_cnv)
output:
svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map",
Expand All @@ -118,16 +122,17 @@ rule svdb_merge_tumor_only:
params:
tumor = get_sample_type(config["samples"], "tumor"),
case_name = config["analysis"]["case_id"],
vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))],
svdb_priority= ",".join(svdb_callers_prio)
threads:
get_threads(cluster_config, "svdb_merge_tumor_only")
message:
"Merging Manta and Delly results for PASS variants using svdb for sample '{params.case_name}' "
shell:
"""
svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \
--vcf {input.manta_vcf}:manta {input.delly_vcf}:delly \
--priority manta,delly | \
bgzip -l 9 -c > {output.svdb_vcf}

--vcf {params.vcf} \
--priority {params.svdb_priority} | \
bgzip -l 9 -c > {output.svdb_vcf};
echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap};
"""
6 changes: 3 additions & 3 deletions BALSAMIC/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,16 @@ def sequencing_type_literal(cls, value) -> str:
class VCFModel(BaseModel):
"""Contains VCF config"""

manta: VarcallerAttribute
cnvkit: VarcallerAttribute
vardict: VarcallerAttribute
tnscope: VarcallerAttribute
dnascope: VarcallerAttribute
tnhaplotyper: VarcallerAttribute
manta_germline: VarcallerAttribute
haplotypecaller: VarcallerAttribute
TNscope_umi: VarcallerAttribute
manta_germline: VarcallerAttribute
manta: VarcallerAttribute
delly: VarcallerAttribute
cnvkit: VarcallerAttribute
ascat: VarcallerAttribute
svdb: VarcallerAttribute

Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/utils/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_variant_callers(
WorkflowRunError if values are not valid
"""

valid_variant_callers = set()
valid_variant_callers = list()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing from list to set might cause redundant entries :-) Make sure you remove them and order it properly now.

Copy link
Collaborator Author

@khurrammaqbool khurrammaqbool Mar 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into this and it is not generating redundant entries. The workflow does not show any changes. Using set() generates random order and this has significant impact on the results.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be ambiguous if/when orders change. Just something to keep it in mind.

if mutation_type not in MUTATION_TYPE:
raise WorkflowRunError(f"{mutation_type} is not a valid mutation type.")

Expand All @@ -99,7 +99,7 @@ def get_variant_callers(
and workflow_solution in variant_caller_params.get("workflow_solution")
and sequencing_type in variant_caller_params.get("sequencing_type")
):
valid_variant_callers.add(variant_caller_name)
valid_variant_callers.append(variant_caller_name)
return list(valid_variant_callers)


Expand Down
18 changes: 18 additions & 0 deletions BALSAMIC/workflows/balsamic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ os.environ['TMPDIR'] = get_result_dir(config)
# Extract variant callers for the workflow
germline_caller = []
somatic_caller = []
somatic_caller_cnv = []
somatic_caller_sv = []
for m in MUTATION_TYPE:
germline_caller_balsamic = get_variant_callers(config=config,
analysis_type=config['analysis']['analysis_type'],
Expand Down Expand Up @@ -170,6 +172,22 @@ for m in MUTATION_TYPE:
mutation_class="somatic")
somatic_caller = somatic_caller + somatic_caller_sentieon_umi + somatic_caller_balsamic + somatic_caller_sentieon

somatic_caller_sv = get_variant_callers(config=config,
analysis_type=config['analysis']['analysis_type'],
workflow_solution="BALSAMIC",
mutation_type="SV",
sequencing_type=config["analysis"]["sequencing_type"],
mutation_class="somatic")

somatic_caller_cnv = get_variant_callers(config=config,
analysis_type=config['analysis']['analysis_type'],
workflow_solution="BALSAMIC",
mutation_type="CNV",
sequencing_type=config["analysis"]["sequencing_type"],
mutation_class="somatic")
somatic_caller_sv.remove("svdb")
svdb_callers_prio = somatic_caller_sv + somatic_caller_cnv

# Collect only snv callers for calculating tmb
somatic_caller_tmb = []
for ws in ["BALSAMIC","Sentieon","Sentieon_umi"]:
Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Added:
* SVdb to WGS workflow #871
* Docker container for vcf2cytosure #858
* SVdb to TGA workflow #871

* SVdb merge SV and CNV #871

Changed:
^^^^^^^^
Expand Down