Skip to content

Commit

Permalink
fix: Update column names to "is_discordant_replicate"
Browse files Browse the repository at this point in the history
- In test_sample_qc_table.py and _add_analytic_exclusion function.
- This aligns with the data that the function _add_subject_representative
  receives from the concordance summary.
- This ensures consistency with the input data. The column will later
  be renamed to "Expected Replicate Discordance" in subsequent processing.
  • Loading branch information
jaamarks committed Sep 6, 2024
1 parent 408550c commit 3eff185
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 16 deletions.
21 changes: 6 additions & 15 deletions src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,7 @@ def main(
)

add_qc_columns(
sample_qc,
remove_contam,
remove_rep_discordant,
sample_qc, remove_contam, remove_rep_discordant,
)

sample_qc = sample_qc.rename(
Expand Down Expand Up @@ -413,8 +411,7 @@ def _read_contam(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.DataFram

if file_name is None:
return pd.DataFrame(
index=Sample_IDs,
columns=["Contamination_Rate", "is_contaminated"],
index=Sample_IDs, columns=["Contamination_Rate", "is_contaminated"],
).astype({"Contamination_Rate": "float", "is_contaminated": "boolean"})

return (
Expand Down Expand Up @@ -457,16 +454,12 @@ def _read_intensity(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.Serie


def add_qc_columns(
sample_qc: pd.DataFrame,
remove_contam: bool,
remove_rep_discordant: bool,
sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
) -> pd.DataFrame:
add_call_rate_flags(sample_qc)
_add_identifiler(sample_qc)
_add_analytic_exclusion(
sample_qc,
remove_contam,
remove_rep_discordant,
sample_qc, remove_contam, remove_rep_discordant,
)
_add_subject_representative(sample_qc)
_add_subject_dropped_from_study(sample_qc)
Expand Down Expand Up @@ -512,9 +505,7 @@ def reason_string(row: pd.Series) -> str:


def _add_analytic_exclusion(
sample_qc: pd.DataFrame,
remove_contam: bool,
remove_rep_discordant: bool,
sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
) -> pd.DataFrame:
"""Adds a flag to remove samples based on provided conditions.
Expand All @@ -534,7 +525,7 @@ def _add_analytic_exclusion(
exclusion_criteria["is_contaminated"] = "Contamination"

if remove_rep_discordant:
exclusion_criteria["Expected Replicate Discordance"] = "Replicate Discordance"
exclusion_criteria["is_discordant_replicate"] = "Replicate Discordance"

sample_qc["analytic_exclusion"] = sample_qc.reindex(exclusion_criteria.keys(), axis=1).any(
axis=1
Expand Down
2 changes: 1 addition & 1 deletion tests/workflow/scripts/test_sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def fake_sample_qc() -> pd.DataFrame:
"is_cr1_filtered",
"is_cr2_filtered",
"is_contaminated",
"Expected Replicate Discordance",
"is_discordant_replicate",
]
data = [
("SP00001", "SB00001", False, False, 0.99, False, False, False, False),
Expand Down

0 comments on commit 3eff185

Please sign in to comment.