diff --git a/README.md b/README.md index d0c3c82..ebd5e9c 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ In addition, it can contain any number of custom properties to further enrich ob function below generates a pyarrow schema for a given set of custom properties. ```python -def data_schema(custom_properties=[]): +def data(custom_properties=[]): return pa.schema( [ ("patient_id", pa.int64()), @@ -181,8 +181,8 @@ DatasetMetadata = TypedDict( #### The code metadata schema. ```python -def code_metadata_schema(custom_per_code_properties=[]): - code_metadata = pa.schema( +def code_metadata(custom_per_code_properties=[]): + return pa.schema( [ ("code", pa.string()), ("description", pa.string()), @@ -190,8 +190,6 @@ def code_metadata_schema(custom_per_code_properties=[]): ] + custom_per_code_properties ) - return code_metadata - # Python type for the above schema CodeMetadata = TypedDict("CodeMetadata", {"code": str, "description": str, "parent_codes": List[str]}, total=False) diff --git a/src/meds/__init__.py b/src/meds/__init__.py index eb6ebd6..8853647 100644 --- a/src/meds/__init__.py +++ b/src/meds/__init__.py @@ -1,14 +1,14 @@ from meds._version import __version__ # noqa from .schema import ( - data_schema, label, Label, train_split, tuning_split, held_out_split, patient_split, code_metadata, + data, label, Label, train_split, tuning_split, held_out_split, patient_split, code_metadata, dataset_metadata, CodeMetadata, DatasetMetadata, birth_code, death_code ) # List all objects that we want to export _exported_objects = { - 'data_schema': data_schema, + 'data': data, 'label': label, 'Label': Label, 'train_split': train_split, diff --git a/src/meds/schema.py b/src/meds/schema.py index 03b4d73..3756283 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -26,7 +26,7 @@ birth_code = "MEDS_BIRTH" death_code = "MEDS_DEATH" -def data_schema(custom_properties=[]): +def data(custom_properties=[]): return pa.schema( [ ("patient_id", pa.int64()), @@ -126,8 +126,8 @@ def data_schema(custom_properties=[]): # The code metadata schema. # This is a parquet schema. -def code_metadata_schema(custom_per_code_properties=[]): - code_metadata = pa.schema( +def code_metadata(custom_per_code_properties=[]): + return pa.schema( [ ("code", pa.string()), ("description", pa.string()), @@ -135,8 +135,6 @@ def code_metadata_schema(custom_per_code_properties=[]): ] + custom_per_code_properties ) - return code_metadata - # Python type for the above schema CodeMetadata = TypedDict("CodeMetadata", {"code": str, "description": str, "parent_codes": List[str]}, total=False)