Skip to content

Commit

Permalink
Implements a faster _sort_ids within read_genome from `parsers.pl…
Browse files Browse the repository at this point in the history
…ink`.
  • Loading branch information
rajwanir2 committed Nov 27, 2024
1 parent 48dd35f commit 789d401
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/cgr_gwas_qc/parsers/plink.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd

from cgr_gwas_qc.typing import PathLike
Expand Down Expand Up @@ -113,20 +114,19 @@ def read_genome(filename: PathLike) -> pd.DataFrame:
- https://www.cog-genomics.org/plink/1.9/formats#genome
"""

def _sort_ids(x: pd.Series):
def _sort_ids(x: pd.DataFrame):
"""Sort IDs alphanumerically."""
x["ID1"], x["ID2"] = sorted([x.IID1, x.IID2])
x.IID1, x.IID2 = np.where(x.IID1 < x.IID2, [x.IID1, x.IID2], [x.IID2, x.IID1])
x.rename(columns={"IID1": "ID1", "IID2": "ID2"}, inplace=True)
return x

return (
return _sort_ids(
pd.read_csv(
filename,
delim_whitespace=True,
dtype={"FID1": "string", "IID1": "string", "FID2": "string", "IID2": "string"},
)
.apply(_sort_ids, axis=1)
.drop(["IID1", "IID2", "FID1", "FID2"], axis=1)
)
).drop(["FID1", "FID2"], axis=1)


def read_imiss(filename: PathLike) -> pd.DataFrame:
Expand Down

0 comments on commit 789d401

Please sign in to comment.