fix: Update column names to "is_discordant_replicate"

- In test_sample_qc_table.py and _add_analytic_exclusion function. - This aligns with the data that the function _add_subject_representative receives from the concordance summary. - This ensures consistency with the input data. The column will later be renamed to "Expected Replicate Discordance" in subsequent processing.
NCI-CGR · Sep 6, 2024 · 3eff185 · 3eff185
1 parent 408550c
commit 3eff185
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 16 deletions.
diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
@@ -192,9 +192,7 @@ def main(
     )
 
     add_qc_columns(
-        sample_qc,
-        remove_contam,
-        remove_rep_discordant,
+        sample_qc, remove_contam, remove_rep_discordant,
     )
 
     sample_qc = sample_qc.rename(
@@ -413,8 +411,7 @@ def _read_contam(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.DataFram
 
     if file_name is None:
         return pd.DataFrame(
-            index=Sample_IDs,
-            columns=["Contamination_Rate", "is_contaminated"],
+            index=Sample_IDs, columns=["Contamination_Rate", "is_contaminated"],
         ).astype({"Contamination_Rate": "float", "is_contaminated": "boolean"})
 
     return (
@@ -457,16 +454,12 @@ def _read_intensity(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.Serie
 
 
 def add_qc_columns(
-    sample_qc: pd.DataFrame,
-    remove_contam: bool,
-    remove_rep_discordant: bool,
+    sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
 ) -> pd.DataFrame:
     add_call_rate_flags(sample_qc)
     _add_identifiler(sample_qc)
     _add_analytic_exclusion(
-        sample_qc,
-        remove_contam,
-        remove_rep_discordant,
+        sample_qc, remove_contam, remove_rep_discordant,
     )
     _add_subject_representative(sample_qc)
     _add_subject_dropped_from_study(sample_qc)
@@ -512,9 +505,7 @@ def reason_string(row: pd.Series) -> str:
 
 
 def _add_analytic_exclusion(
-    sample_qc: pd.DataFrame,
-    remove_contam: bool,
-    remove_rep_discordant: bool,
+    sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
 ) -> pd.DataFrame:
     """Adds a flag to remove samples based on provided conditions.
 
@@ -534,7 +525,7 @@ def _add_analytic_exclusion(
         exclusion_criteria["is_contaminated"] = "Contamination"
 
     if remove_rep_discordant:
-        exclusion_criteria["Expected Replicate Discordance"] = "Replicate Discordance"
+        exclusion_criteria["is_discordant_replicate"] = "Replicate Discordance"
 
     sample_qc["analytic_exclusion"] = sample_qc.reindex(exclusion_criteria.keys(), axis=1).any(
         axis=1

diff --git a/tests/workflow/scripts/test_sample_qc_table.py b/tests/workflow/scripts/test_sample_qc_table.py
@@ -254,7 +254,7 @@ def fake_sample_qc() -> pd.DataFrame:
         "is_cr1_filtered",
         "is_cr2_filtered",
         "is_contaminated",
-        "Expected Replicate Discordance",
+        "is_discordant_replicate",
     ]
     data = [
         ("SP00001", "SB00001", False, False, 0.99, False, False, False, False),