Skip to content

Commit

Permalink
Got pre_MEDS.py working (for MIMIC-IV, not eICU) for arbitrary suppor…
Browse files Browse the repository at this point in the history
…ted input file encodings so that unzipping works. Also made the bash arg (I think) work. Also removed rootutils to help address #114
  • Loading branch information
mmcdermott committed Aug 4, 2024
1 parent c623603 commit 6eaecb6
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 9 deletions.
2 changes: 1 addition & 1 deletion MIMIC-IV_Example/joint_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ else
fi

if [ "$DO_UNZIP" == "true" ]; then
echo "Unzipping gunzip csv files."
echo "Unzipping csv files."
for file in "${MIMICIV_RAW_DIR}"/*/*.csv.gz; do gzip -d --force "$file"; done
else
echo "Skipping unzipping."
Expand Down
10 changes: 6 additions & 4 deletions MIMIC-IV_Example/pre_MEDS.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
#!/usr/bin/env python

"""Performs pre-MEDS data wrangling for MIMIC-IV."""
import rootutils

root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True)

from datetime import datetime
from functools import partial
Expand Down Expand Up @@ -77,7 +74,12 @@ def main(cfg: DictConfig):
for in_fp in all_fps:
pfx = get_shard_prefix(raw_cohort_dir, in_fp)

fp, read_fn = get_supported_fp(raw_cohort_dir, pfx)
try:
fp, read_fn = get_supported_fp(raw_cohort_dir, pfx)
except FileNotFoundError:
logger.info(f"Skipping {pfx} @ {str(in_fp.resolve())} as no compatible dataframe file was found.")
continue

if fp.suffix in [".csv", ".csv.gz"]:
read_fn = partial(read_fn, infer_schema_length=100000)

Expand Down
3 changes: 0 additions & 3 deletions eICU_Example/pre_MEDS.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
See the docstring of `main` for more information.
"""
import rootutils

root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True)

import gzip
from collections.abc import Callable
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ dependencies = [
[tool.setuptools_scm]

[project.optional-dependencies]
examples = ["rootutils"]
dev = ["pre-commit"]
tests = ["pytest", "pytest-cov", "rootutils"]
local_parallelism = ["hydra-joblib-launcher"]
Expand Down

0 comments on commit 6eaecb6

Please sign in to comment.