Skip to content

Commit

Permalink
Update LD clumping to take loci with best p-value per bin. (#539)
Browse files Browse the repository at this point in the history
Previously we were taking the max absolute value of beta, now we're
taking the best (lowest) p-value per bin.
  • Loading branch information
cristinaetrv committed Jul 19, 2024
1 parent 97a22af commit 2abeffb
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions python/python/bystro/prs/preprocess_for_prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,11 +344,16 @@ def select_max_effect_per_bin(scores_overlap_abs_val: pd.DataFrame) -> pd.DataFr
]


def select_min_pval_per_bin(scores_overlap_abs_val: pd.DataFrame) -> pd.DataFrame:
"""Select the row with the smallest p-value for each bin."""
return scores_overlap_abs_val.loc[scores_overlap_abs_val.groupby(["CHR", "bin"])["P"].idxmin()]


def clean_scores_for_analysis(
max_effect_per_bin: pd.DataFrame, column_to_drop: str
min_pval_per_bin: pd.DataFrame, column_to_drop: str
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Drop extra columns and prepare df with final set of loci for dosage matrix filtering."""
scores_overlap_adjusted = max_effect_per_bin.drop(columns=["bin", "abs_effect_weight"])
scores_overlap_adjusted = min_pval_per_bin.drop(columns=["bin"])
scores_overlap_adjusted = scores_overlap_adjusted.set_index("SNPID")
format_col_index = scores_overlap_adjusted.columns.get_loc(column_to_drop)
# If there is more than 1 column found, our "columns_to_keep" function will not work
Expand All @@ -370,9 +375,8 @@ def ld_clump(scores_overlap: pd.DataFrame, map_path: str) -> tuple[pd.DataFrame,
)
scores_overlap.insert(0, "allele_comparison", allele_comparison_results)
scores_overlap_w_bins = assign_bins(scores_overlap, bin_mappings)
scores_overlap_abs_val = calculate_abs_effect_weights(scores_overlap_w_bins)
max_effect_per_bin = select_max_effect_per_bin(scores_overlap_abs_val)
return clean_scores_for_analysis(max_effect_per_bin, "ID_effect_as_ref")
min_pval_per_bin = select_min_pval_per_bin(scores_overlap_w_bins)
return clean_scores_for_analysis(min_pval_per_bin, "ID_effect_as_ref")


def finalize_dosage_after_c_t(
Expand Down

0 comments on commit 2abeffb

Please sign in to comment.