Skip to content

Commit

Permalink
Made some more preliminary changes; not done yet.
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcdermott committed Jul 19, 2024
1 parent 99f3d5d commit 927fb25
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 133 deletions.
26 changes: 13 additions & 13 deletions MIMIC-IV_Example/configs/event_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ hosp/diagnoses_icd:
_metadata:
d_icd_diagnoses:
description: "long_title"
parent_code: "ICD${icd_version}CM/${icd_code}" # Single strings are templates of columns.
parent_code: "ICD{icd_version}CM/{icd_code}" # Single strings are templates of columns.

hosp/drgcodes:
drg:
Expand Down Expand Up @@ -105,7 +105,7 @@ hosp/labevents:
d_labitems_to_loinc:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"

hosp/omr:
omr:
Expand Down Expand Up @@ -170,8 +170,8 @@ hosp/procedures_icd:
d_icd_procedures:
description: "long_title"
parent_code: # List of objects are string labels mapping to filters to be evaluated.
- "ICD${icd_version}Proc/${icd_code}": { icd_version: 9 }
- "ICD${icd_version}PCS/${icd_code}": { icd_version: 10 }
- "ICD{icd_version}Proc/{icd_code}": { icd_version: 9 }
- "ICD{icd_version}PCS/{icd_code}": { icd_version: 10 }

hosp/transfers:
transfer:
Expand Down Expand Up @@ -217,12 +217,12 @@ icu/chartevents:
meas_chartevents_main:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
# TODO: I don't know if this is necessary...
d_labitems_to_loinc:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"

icu/procedureevents:
start:
Expand All @@ -238,11 +238,11 @@ icu/procedureevents:
proc_datetimeevents:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
proc_itemid:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
end:
code:
- PROCEDURE
Expand All @@ -256,11 +256,11 @@ icu/procedureevents:
proc_datetimeevents:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
proc_itemid:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"

icu/inputevents:
input_start:
Expand All @@ -280,7 +280,7 @@ icu/inputevents:
inputevents_to_rxnorm:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
rateuom: null # A null column means this column is needed in pulling from the metadata.
input_end:
code:
Expand All @@ -300,7 +300,7 @@ icu/inputevents:
inputevents_to_rxnorm:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
patient_weight:
code:
- PATIENT_WEIGHT_AT_INFUSION
Expand All @@ -324,5 +324,5 @@ icu/outputevents:
outputevents_to_rxnorm:
description: ["omop_concept_name", "label"] # List of strings are columns to be collated
itemid: "itemid (omop_source_code)"
parent_code: "${omop_vocabulary_id}/${omop_concept_code}"
parent_code: "{omop_vocabulary_id}/{omop_concept_code}"
valueuom: unitname
55 changes: 32 additions & 23 deletions src/MEDS_polars_functions/extract/convert_to_sharded_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,31 @@ def in_format(fmt: str, ts_name: str) -> pl.Expr:
return pl.col(ts_name).str.strptime(pl.Datetime, fmt, strict=False)


def get_code_expr(code_field: str | list | ListConfig) -> tuple[pl.Expr, pl.Expr | None, set[str]]:
"""TODO."""
if isinstance(code_field, str):
code_field = [code_field]

code_exprs = []
code_null_filter_expr = None
needed_cols = set()
for i, code in enumerate(code_field):
match code:
case str() if is_col_field(code):
code_col = parse_col_field(code)
needed_cols.add(code_col)
code_exprs.append(pl.col(code_col).cast(pl.Utf8).fill_null("UNK"))
if i == 0:
code_null_filter_expr = pl.col(code_col).is_not_null()
case str():
code_exprs.append(pl.lit(code, dtype=pl.Utf8))
case _:
raise ValueError(f"Invalid code literal: {code}")
code_expr = reduce(lambda a, b: a + pl.lit("//") + b, code_exprs).cast(pl.Categorical)

return code_expr, code_null_filter_expr, needed_cols


def extract_event(df: pl.LazyFrame, event_cfg: dict[str, str | None]) -> pl.LazyFrame:
"""Extracts a single event dataframe from the raw data.
Expand Down Expand Up @@ -293,7 +318,6 @@ def extract_event(df: pl.LazyFrame, event_cfg: dict[str, str | None]) -> pl.Lazy
...
ValueError: Source column 'discharge_time' for event column foobar is not numeric or categorical! Cannot be used as an event col.
""" # noqa: E501
df = df
event_exprs = {"patient_id": pl.col("patient_id")}

if "code" not in event_cfg:
Expand All @@ -309,29 +333,14 @@ def extract_event(df: pl.LazyFrame, event_cfg: dict[str, str | None]) -> pl.Lazy
if "patient_id" in event_cfg:
raise KeyError("Event column name 'patient_id' cannot be overridden.")

codes = event_cfg.pop("code")
if not isinstance(codes, (list, ListConfig)):
logger.debug(
f"Event code '{codes}' is a {type(codes)}, not a list. Automatically converting to a list."
)
codes = [codes]
code_expr, code_null_filter_expr, needed_cols = get_code_expr(event_cfg.pop("code"))

code_exprs = []
code_null_filter_expr = None
for i, code in enumerate(codes):
match code:
case str() if is_col_field(code) and parse_col_field(code) in df.schema:
code_col = parse_col_field(code)
logger.info(f"Extracting code column {code_col}")
code_exprs.append(pl.col(code_col).cast(pl.Utf8).fill_null("UNK"))
if i == 0:
code_null_filter_expr = pl.col(code_col).is_not_null()
case str():
logger.info(f"Adding code literate {code}")
code_exprs.append(pl.lit(code, dtype=pl.Utf8))
case _:
raise ValueError(f"Invalid code literal: {code}")
event_exprs["code"] = reduce(lambda a, b: a + pl.lit("//") + b, code_exprs).cast(pl.Categorical)
for col in needed_cols:
if col not in df.schema:
raise KeyError(f"Source column '{col}' for event column code not found in DataFrame schema.")
logger.info(f"Extracting code column {code_col}")

event_exprs["code"] = code_expr

ts = event_cfg.pop("timestamp")
ts_format = event_cfg.pop("timestamp_format", None)
Expand Down
Loading

0 comments on commit 927fb25

Please sign in to comment.