-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from mmcdermott/multirun
Multirun capabilities and other improvements
- Loading branch information
Showing
31 changed files
with
2,646 additions
and
177 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env bash | ||
|
||
# This makes the script fail if any internal script fails | ||
set -e | ||
|
||
# Function to display help message | ||
function display_help() { | ||
echo "Usage: $0 <MIMICIV_RAW_DIR> <MIMICIV_PREMEDS_DIR> <MIMICIV_MEDS_DIR> <N_PARALLEL_WORKERS>" | ||
echo | ||
echo "This script processes MIMIC-IV data through several steps, handling raw data conversion," | ||
echo "sharding events, splitting patients, converting to sharded events, and merging into a MEDS cohort." | ||
echo | ||
echo "Arguments:" | ||
echo " MIMICIV_RAW_DIR Directory containing raw MIMIC-IV data files." | ||
echo " MIMICIV_PREMEDS_DIR Output directory for pre-MEDS data." | ||
echo " MIMICIV_MEDS_DIR Output directory for processed MEDS data." | ||
echo " N_PARALLEL_WORKERS Number of parallel workers for processing." | ||
echo | ||
echo "Options:" | ||
echo " -h, --help Display this help message and exit." | ||
exit 1 | ||
} | ||
|
||
# Check if the first parameter is '-h' or '--help' | ||
if [[ "$1" == "-h" || "$1" == "--help" ]]; then | ||
display_help | ||
fi | ||
|
||
# Check for mandatory parameters | ||
if [ "$#" -lt 4 ]; then | ||
echo "Error: Incorrect number of arguments provided." | ||
display_help | ||
fi | ||
|
||
MIMICIV_RAW_DIR="$1" | ||
MIMICIV_PREMEDS_DIR="$2" | ||
MIMICIV_MEDS_DIR="$3" | ||
N_PARALLEL_WORKERS="$4" | ||
|
||
shift 4 | ||
|
||
echo "Running pre-MEDS conversion." | ||
./MIMIC-IV_Example/pre_MEDS.py raw_cohort_dir="$MIMICIV_RAW_DIR" output_dir="$MIMICIV_PREMEDS_DIR" | ||
|
||
echo "Running shard_events.py with $N_PARALLEL_WORKERS workers in parallel" | ||
./scripts/extraction/shard_events.py \ | ||
--multirun \ | ||
worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
hydra/launcher=joblib \ | ||
input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" | ||
|
||
echo "Splitting patients in serial" | ||
./scripts/extraction/split_and_shard_patients.py \ | ||
input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" | ||
|
||
echo "Converting to sharded events with $N_PARALLEL_WORKERS workers in parallel" | ||
./scripts/extraction/convert_to_sharded_events.py \ | ||
--multirun \ | ||
worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
hydra/launcher=joblib \ | ||
input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" | ||
|
||
echo "Merging to a MEDS cohort with $N_PARALLEL_WORKERS workers in parallel" | ||
./scripts/extraction/merge_to_MEDS_cohort.py \ | ||
--multirun \ | ||
worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
hydra/launcher=joblib \ | ||
input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env bash | ||
|
||
# This makes the script fail if any internal script fails | ||
set -e | ||
|
||
# Function to display help message | ||
function display_help() { | ||
echo "Usage: $0 <MIMICIV_RAW_DIR> <MIMICIV_PREMEDS_DIR> <MIMICIV_MEDS_DIR> <N_PARALLEL_WORKERS>" | ||
echo | ||
echo "This script processes MIMIC-IV data through several steps, handling raw data conversion," | ||
echo "sharding events, splitting patients, converting to sharded events, and merging into a MEDS cohort." | ||
echo "This script uses slurm to process the data in parallel via the 'submitit' Hydra launcher." | ||
echo | ||
echo "Arguments:" | ||
echo " MIMICIV_RAW_DIR Directory containing raw MIMIC-IV data files." | ||
echo " MIMICIV_PREMEDS_DIR Output directory for pre-MEDS data." | ||
echo " MIMICIV_MEDS_DIR Output directory for processed MEDS data." | ||
echo " N_PARALLEL_WORKERS Number of parallel workers for processing." | ||
echo | ||
echo "Options:" | ||
echo " -h, --help Display this help message and exit." | ||
exit 1 | ||
} | ||
|
||
# Check if the first parameter is '-h' or '--help' | ||
if [[ "$1" == "-h" || "$1" == "--help" ]]; then | ||
display_help | ||
fi | ||
|
||
# Check for mandatory parameters | ||
if [ "$#" -ne 4 ]; then | ||
echo "Error: Incorrect number of arguments provided." | ||
display_help | ||
fi | ||
|
||
export MIMICIV_RAW_DIR="$1" | ||
export MIMICIV_PREMEDS_DIR="$2" | ||
export MIMICIV_MEDS_DIR="$3" | ||
export N_PARALLEL_WORKERS="$4" | ||
|
||
shift 4 | ||
|
||
# Note we use `--multirun` throughout here due to ensure the submitit launcher is used throughout, so that | ||
# this doesn't fall back on running anything locally in a setting where only slurm worker nodes have | ||
# sufficient computational resources to run the actual jobs. | ||
|
||
# echo "Running pre-MEDS conversion on one worker." | ||
# ./MIMIC-IV_Example/pre_MEDS.py \ | ||
# --multirun \ | ||
# worker="range(0,1)" \ | ||
# hydra/launcher=submitit_slurm \ | ||
# hydra.launcher.timeout_min=60 \ | ||
# hydra.launcher.cpus_per_task=10 \ | ||
# hydra.launcher.mem_gb=50 \ | ||
# hydra.launcher.partition="short" \ | ||
# raw_cohort_dir="$MIMICIV_RAW_DIR" \ | ||
# output_dir="$MIMICIV_PREMEDS_DIR" | ||
|
||
echo "Trying submitit launching with $N_PARALLEL_WORKERS jobs." | ||
|
||
./scripts/extraction/shard_events.py \ | ||
--multirun \ | ||
worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
hydra/launcher=submitit_slurm \ | ||
hydra.launcher.timeout_min=60 \ | ||
hydra.launcher.cpus_per_task=10 \ | ||
hydra.launcher.mem_gb=50 \ | ||
hydra.launcher.partition="short" \ | ||
"hydra.job.env_copy=[PATH]" \ | ||
input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml \ | ||
stage=shard_events | ||
|
||
# echo "Splitting patients on one worker" | ||
# ./scripts/extraction/split_and_shard_patients.py \ | ||
# --multirun \ | ||
# worker="range(0,1)" \ | ||
# hydra/launcher=submitit_slurm \ | ||
# hydra.launcher.timeout_min=60 \ | ||
# hydra.launcher.cpus_per_task=10 \ | ||
# hydra.launcher.mem_gb=50 \ | ||
# hydra.launcher.partition="short" \ | ||
# input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
# cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
# event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" | ||
# | ||
# echo "Converting to sharded events with $N_PARALLEL_WORKERS workers in parallel" | ||
# ./scripts/extraction/convert_to_sharded_events.py \ | ||
# --multirun \ | ||
# worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
# hydra/launcher=submitit_slurm \ | ||
# hydra.launcher.timeout_min=60 \ | ||
# hydra.launcher.cpus_per_task=10 \ | ||
# hydra.launcher.mem_gb=50 \ | ||
# hydra.launcher.partition="short" \ | ||
# input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
# cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
# event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" | ||
# | ||
# echo "Merging to a MEDS cohort with $N_PARALLEL_WORKERS workers in parallel" | ||
# ./scripts/extraction/merge_to_MEDS_cohort.py \ | ||
# --multirun \ | ||
# worker="range(0,$N_PARALLEL_WORKERS)" \ | ||
# hydra/launcher=submitit_slurm \ | ||
# hydra.launcher.timeout_min=60 \ | ||
# hydra.launcher.cpus_per_task=10 \ | ||
# hydra.launcher.mem_gb=50 \ | ||
# hydra.launcher.partition="short" \ | ||
# input_dir="$MIMICIV_PREMEDS_DIR" \ | ||
# cohort_dir="$MIMICIV_MEDS_DIR" \ | ||
# event_conversion_config_fp=./MIMIC-IV_Example/configs/event_configs.yaml "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/env bash | ||
#SBATCH -c 10 # Request one core | ||
#SBATCH -t 0-03:00 # Runtime in D-HH:MM format | ||
#SBATCH -p short # Partition to run in | ||
#SBATCH --mem=300GB # Memory total in MiB (for all cores) | ||
#SBATCH -o MIMIC_IV_MEDS_%j_sbatch.out # File to which STDOUT will be written, including job ID (%j) | ||
#SBATCH -e MIMIC_IV_MEDS_%j_sbatch.err # File to which STDERR will be written, including job ID (%j) | ||
|
||
cd /n/data1/hms/dbmi/zaklab/mmd/MEDS_polars_functions || exit | ||
|
||
MIMICIV_MEDS_DIR="$3" | ||
|
||
LOG_DIR="$MIMICIV_MEDS_DIR/.logs" | ||
|
||
echo "Running with saving to $LOG_DIR" | ||
|
||
mkdir -p "$LOG_DIR" | ||
|
||
PATH="/home/mbm47/.conda/envs/MEDS_pipelines/bin:$PATH" \ | ||
time mprof run --include-children --exit-code --output "$LOG_DIR/mprofile.dat" \ | ||
./MIMIC-IV_Example/joint_script.sh "$@" 2> "$LOG_DIR/timings.txt" |
Oops, something went wrong.