Clinical-Genomics · khurrammaqbool · Mar 15, 2022 · Mar 10, 2022 · Mar 14, 2022 · Mar 14, 2022
@@ -75,7 +75,7 @@
         "mutation": "somatic",
         "type": "SV",
         "analysis_type": ["paired", "single"],
-        "sequencing_type": ["wgs", "targeted"],
+        "sequencing_type": ["targeted", "wgs"],
         "workflow_solution": ["BALSAMIC"],
     },
     "ascat": {
@@ -89,7 +89,7 @@
         "mutation": "somatic",
         "type": "SV",
         "analysis_type": ["paired", "single"],
-        "sequencing_type": ["wgs", "targeted"],
+        "sequencing_type": ["targeted", "wgs"],
         "workflow_solution": ["BALSAMIC"],
     },
 }

@@ -9,6 +9,6 @@ dependencies:
   - bioconda::gatk=3.8
   - bioconda::vardict=2019.06.04=pl526_0
   - bioconda::vardict-java=1.7
-  - bioconda::svdb=2.5.0
+  - bioconda::svdb=2.5.1
   - conda-forge::libiconv
   - conda-forge::r-base=3.6.3
@@ -209,8 +209,12 @@ python {params.merge_ascat_output_script} {output.ascat_output_pdf} {input.sampl
 
 rule svdb_merge_tumor_normal:
     input:
-        manta_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz",
-        delly_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".delly.vcf.gz",
+        vcf = expand(
+                vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
+                caller=somatic_caller_sv) +
+              expand(
+                vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
+                caller=somatic_caller_cnv)
     output:
         svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
         namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map",
@@ -222,16 +226,18 @@ rule svdb_merge_tumor_normal:
         tumor = get_sample_type(config["samples"], "tumor"),
         normal = get_sample_type(config["samples"], "normal"),
         case_name = config["analysis"]["case_id"],
+        vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))],
+        svdb_priority= ",".join(svdb_callers_prio)
     threads:
         get_threads(cluster_config, "svdb_merge_tumor_normal")
     message:
         "Merging Manta and Delly results for PASS variants using svdb for sample '{params.case_name}' "
     shell:
         """
 svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \
---vcf {input.manta_vcf}:manta {input.delly_vcf}:delly \
---priority manta,delly | \
-bgzip -l 9 -c  > {output.svdb_vcf}
+--vcf {params.vcf} \
+--priority {params.svdb_priority} | \
+bgzip -l 9 -c  > {output.svdb_vcf};
 
 echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap};
         """
@@ -106,8 +106,12 @@ tabix -p vcf -f {output.vcf};
 
 rule svdb_merge_tumor_only:
     input:
-        manta_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".manta.vcf.gz",
-        delly_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".delly.vcf.gz",
+        vcf = expand(
+                vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
+                caller=somatic_caller_sv) +
+              expand(
+                vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
+                caller=somatic_caller_cnv)
     output:
         svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
         namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map",
@@ -118,16 +122,17 @@ rule svdb_merge_tumor_only:
     params:
         tumor = get_sample_type(config["samples"], "tumor"),
         case_name = config["analysis"]["case_id"],
+        vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))],
+        svdb_priority= ",".join(svdb_callers_prio)
     threads:
         get_threads(cluster_config, "svdb_merge_tumor_only")
     message:
         "Merging Manta and Delly results for PASS variants using svdb for sample '{params.case_name}' "
     shell:
         """
 svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \
---vcf {input.manta_vcf}:manta {input.delly_vcf}:delly \
---priority manta,delly | \
-bgzip -l 9 -c > {output.svdb_vcf}
-
+--vcf {params.vcf} \
+--priority {params.svdb_priority} | \
+bgzip -l 9 -c > {output.svdb_vcf};
 echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap};
         """
@@ -176,16 +176,16 @@ def sequencing_type_literal(cls, value) -> str:
 class VCFModel(BaseModel):
     """Contains VCF config"""
 
-    manta: VarcallerAttribute
-    cnvkit: VarcallerAttribute
     vardict: VarcallerAttribute
     tnscope: VarcallerAttribute
     dnascope: VarcallerAttribute
     tnhaplotyper: VarcallerAttribute
-    manta_germline: VarcallerAttribute
     haplotypecaller: VarcallerAttribute
     TNscope_umi: VarcallerAttribute
+    manta_germline: VarcallerAttribute
+    manta: VarcallerAttribute
     delly: VarcallerAttribute
+    cnvkit: VarcallerAttribute
     ascat: VarcallerAttribute
     svdb: VarcallerAttribute
 

@@ -75,7 +75,7 @@ def get_variant_callers(
         WorkflowRunError if values are not valid
     """
 
-    valid_variant_callers = set()
+    valid_variant_callers = list()
     if mutation_type not in MUTATION_TYPE:
         raise WorkflowRunError(f"{mutation_type} is not a valid mutation type.")
 
@@ -99,7 +99,7 @@ def get_variant_callers(
             and workflow_solution in variant_caller_params.get("workflow_solution")
             and sequencing_type in variant_caller_params.get("sequencing_type")
         ):
-            valid_variant_callers.add(variant_caller_name)
+            valid_variant_callers.append(variant_caller_name)
     return list(valid_variant_callers)
 
 

@@ -130,6 +130,8 @@ os.environ['TMPDIR'] = get_result_dir(config)
 # Extract variant callers for the workflow
 germline_caller = []
 somatic_caller = []
+somatic_caller_cnv = []
+somatic_caller_sv = []
 for m in MUTATION_TYPE:
     germline_caller_balsamic = get_variant_callers(config=config,
                                             analysis_type=config['analysis']['analysis_type'],
@@ -170,6 +172,22 @@ for m in MUTATION_TYPE:
                                              mutation_class="somatic")
     somatic_caller = somatic_caller + somatic_caller_sentieon_umi + somatic_caller_balsamic + somatic_caller_sentieon
 
+somatic_caller_sv = get_variant_callers(config=config,
+                                            analysis_type=config['analysis']['analysis_type'],
+                                            workflow_solution="BALSAMIC",
+                                            mutation_type="SV",
+                                            sequencing_type=config["analysis"]["sequencing_type"],
+                                            mutation_class="somatic")
+
+somatic_caller_cnv = get_variant_callers(config=config,
+                                            analysis_type=config['analysis']['analysis_type'],
+                                            workflow_solution="BALSAMIC",
+                                            mutation_type="CNV",
+                                            sequencing_type=config["analysis"]["sequencing_type"],
+                                            mutation_class="somatic")
+somatic_caller_sv.remove("svdb")
+svdb_callers_prio = somatic_caller_sv + somatic_caller_cnv
+
 # Collect only snv callers for calculating tmb
 somatic_caller_tmb = []
 for ws in ["BALSAMIC","Sentieon","Sentieon_umi"]:

@@ -12,7 +12,7 @@ Added:
 * SVdb to WGS workflow #871
 * Docker container for vcf2cytosure #858
 * SVdb to TGA workflow #871 
-
+* SVdb merge SV and CNV #871
 
 Changed:
 ^^^^^^^^