Skip to content

Commit

Permalink
Standardized schema naming convention and fixed another typo.
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcdermott committed Jul 30, 2024
1 parent 986b296 commit 34465fe
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 12 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ In addition, it can contain any number of custom properties to further enrich ob
function below generates a pyarrow schema for a given set of custom properties.

```python
def data_schema(custom_properties=[]):
def data(custom_properties=[]):
return pa.schema(
[
("patient_id", pa.int64()),
Expand Down Expand Up @@ -181,17 +181,15 @@ DatasetMetadata = TypedDict(
#### The code metadata schema.

```python
def code_metadata_schema(custom_per_code_properties=[]):
code_metadata = pa.schema(
def code_metadata(custom_per_code_properties=[]):
return pa.schema(
[
("code", pa.string()),
("description", pa.string()),
("parent_codes", pa.list(pa.string()),
] + custom_per_code_properties
)

return code_metadata

# Python type for the above schema

CodeMetadata = TypedDict("CodeMetadata", {"code": str, "description": str, "parent_codes": List[str]}, total=False)
Expand Down
4 changes: 2 additions & 2 deletions src/meds/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from meds._version import __version__ # noqa

from .schema import (
data_schema, label, Label, train_split, tuning_split, held_out_split, patient_split, code_metadata,
data, label, Label, train_split, tuning_split, held_out_split, patient_split, code_metadata,
dataset_metadata, CodeMetadata, DatasetMetadata, birth_code, death_code
)


# List all objects that we want to export
_exported_objects = {
'data_schema': data_schema,
'data': data,
'label': label,
'Label': Label,
'train_split': train_split,
Expand Down
8 changes: 3 additions & 5 deletions src/meds/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
birth_code = "MEDS_BIRTH"
death_code = "MEDS_DEATH"

def data_schema(custom_properties=[]):
def data(custom_properties=[]):
return pa.schema(
[
("patient_id", pa.int64()),
Expand Down Expand Up @@ -126,17 +126,15 @@ def data_schema(custom_properties=[]):
# The code metadata schema.
# This is a parquet schema.

def code_metadata_schema(custom_per_code_properties=[]):
code_metadata = pa.schema(
def code_metadata(custom_per_code_properties=[]):
return pa.schema(
[
("code", pa.string()),
("description", pa.string()),
("parent_codes", pa.list(pa.string())),
] + custom_per_code_properties
)

return code_metadata

# Python type for the above schema

CodeMetadata = TypedDict("CodeMetadata", {"code": str, "description": str, "parent_codes": List[str]}, total=False)

0 comments on commit 34465fe

Please sign in to comment.