From 6eaecb65fac8276328a9226305dee94549f0cd79 Mon Sep 17 00:00:00 2001 From: Matthew McDermott Date: Sun, 4 Aug 2024 15:33:33 -0400 Subject: [PATCH] Got pre_MEDS.py working (for MIMIC-IV, not eICU) for arbitrary supported input file encodings so that unzipping works. Also made the bash arg (I think) work. Also removed rootutils to help address #114 --- MIMIC-IV_Example/joint_script.sh | 2 +- MIMIC-IV_Example/pre_MEDS.py | 10 ++++++---- eICU_Example/pre_MEDS.py | 3 --- pyproject.toml | 1 - 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/MIMIC-IV_Example/joint_script.sh b/MIMIC-IV_Example/joint_script.sh index 2b4ea56..a98fee7 100755 --- a/MIMIC-IV_Example/joint_script.sh +++ b/MIMIC-IV_Example/joint_script.sh @@ -66,7 +66,7 @@ else fi if [ "$DO_UNZIP" == "true" ]; then - echo "Unzipping gunzip csv files." + echo "Unzipping csv files." for file in "${MIMICIV_RAW_DIR}"/*/*.csv.gz; do gzip -d --force "$file"; done else echo "Skipping unzipping." diff --git a/MIMIC-IV_Example/pre_MEDS.py b/MIMIC-IV_Example/pre_MEDS.py index ed317b6..1530a8b 100755 --- a/MIMIC-IV_Example/pre_MEDS.py +++ b/MIMIC-IV_Example/pre_MEDS.py @@ -1,9 +1,6 @@ #!/usr/bin/env python """Performs pre-MEDS data wrangling for MIMIC-IV.""" -import rootutils - -root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True) from datetime import datetime from functools import partial @@ -77,7 +74,12 @@ def main(cfg: DictConfig): for in_fp in all_fps: pfx = get_shard_prefix(raw_cohort_dir, in_fp) - fp, read_fn = get_supported_fp(raw_cohort_dir, pfx) + try: + fp, read_fn = get_supported_fp(raw_cohort_dir, pfx) + except FileNotFoundError: + logger.info(f"Skipping {pfx} @ {str(in_fp.resolve())} as no compatible dataframe file was found.") + continue + if fp.suffix in [".csv", ".csv.gz"]: read_fn = partial(read_fn, infer_schema_length=100000) diff --git a/eICU_Example/pre_MEDS.py b/eICU_Example/pre_MEDS.py index 317ecb5..5ebe058 100755 --- a/eICU_Example/pre_MEDS.py +++ b/eICU_Example/pre_MEDS.py @@ -4,9 +4,6 @@ See the docstring of `main` for more information. """ -import rootutils - -root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True) import gzip from collections.abc import Callable diff --git a/pyproject.toml b/pyproject.toml index 5bea0a8..e512e7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ dependencies = [ [tool.setuptools_scm] [project.optional-dependencies] -examples = ["rootutils"] dev = ["pre-commit"] tests = ["pytest", "pytest-cov", "rootutils"] local_parallelism = ["hydra-joblib-launcher"]