Skip to content

Commit

Permalink
Update schema.py
Browse files Browse the repository at this point in the history
  • Loading branch information
EthanSteinberg authored Jul 19, 2024
1 parent 28b21b0 commit 6544c3d
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion src/meds/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
# glob("data/**/*.parquet") is the recommended way for obtaining all patient event files.
# - dataset_metadata.json
# Dataset level metadata containing information about the ETL used, data version, etc
# - code_metadata.parquet
# - (Optional) code_metadata.parquet
# Code level metadata containing information about the code descriptions, standard mappings, etc
# - (Optional) patient_split.csv
# A specification of patient splits that should be used.

############################################################

Expand Down Expand Up @@ -78,6 +80,22 @@ def patient_events_schema(custom_per_event_properties=[]):
"categorical_value" : Optional[str],
}, total=False)


############################################################

# The patient split schema.

train_split = "train"
tuning_split = "tuning"
test_split = "test"

patient_split = pa.schema(
[
("patient_id", pa.int64()),
("split", pa.string()),
]
)

############################################################

# The dataset metadata schema.
Expand Down

0 comments on commit 6544c3d

Please sign in to comment.