From 0cfcc816576dff74cd2a41be89818fa351cf880b Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Sun, 28 Apr 2024 15:15:05 +0000 Subject: [PATCH 01/10] Start testing everything --- src/meds/schema.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/meds/schema.py b/src/meds/schema.py index 3689941..095b0b8 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -22,24 +22,21 @@ death_code = "SNOMED/419620001" -def patient_schema(per_event_metadata_schema=pa.null()): +def patient_schema(per_event_properties_schema=pa.null()): # Return a patient schema with a particular per event metadata subschema - measurement = pa.struct( + event = pa.struct( [ + ("time", pa.timestamp("us")), ("code", pa.string()), ("text_value", pa.string()), ("numeric_value", pa.float32()), - ("datetime_value", pa.timestamp("us")), - ("metadata", per_event_metadata_schema), + ("properties", per_event_properties_schema), ] ) - event = pa.struct([("time", pa.timestamp("us")), ("measurements", pa.list_(measurement))]) - patient = pa.schema( [ ("patient_id", pa.int64()), - ("static_measurements", pa.list_(measurement)), ("events", pa.list_(event)), # Require ordered by time ] ) @@ -49,20 +46,18 @@ def patient_schema(per_event_metadata_schema=pa.null()): # Python types for the above schema -Measurement = TypedDict( - "Measurement", +Event = TypedDict( + "Event", { + "time": datetime.datetime, "code": str, "text_value": NotRequired[str], "numeric_value": NotRequired[float], - "datetime_value": NotRequired[datetime.datetime], - "metadata": NotRequired[Any], + "properties": NotRequired[Any], }, ) -Event = TypedDict("Event", {"time": datetime.datetime, "measurements": List[Measurement]}) - -Patient = TypedDict("Patient", {"patient_id": int, "static_measurements": List[Measurement], "events": List[Event]}) +Patient = TypedDict("Patient", {"patient_id": int, "events": List[Event]}) ############################################################ From 21410c219d8792fab605055149fd308593df98cc Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Sun, 28 Apr 2024 15:17:18 +0000 Subject: [PATCH 02/10] Add static event time --- src/meds/schema.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/meds/schema.py b/src/meds/schema.py index 095b0b8..2ce0ba7 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -21,6 +21,9 @@ birth_code = "SNOMED/184099003" death_code = "SNOMED/419620001" +# We define static events as always occurring on January 1st, 1 AD +static_event_time = datetime.datetime(1, 1, 1) + def patient_schema(per_event_properties_schema=pa.null()): # Return a patient schema with a particular per event metadata subschema From 4ed6bc7d5c641df0c56243be653bdad7cc030829 Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 10:01:36 -0700 Subject: [PATCH 03/10] Update schema.py --- src/meds/schema.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/meds/schema.py b/src/meds/schema.py index 2ce0ba7..257649e 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -24,23 +24,22 @@ # We define static events as always occurring on January 1st, 1 AD static_event_time = datetime.datetime(1, 1, 1) - def patient_schema(per_event_properties_schema=pa.null()): # Return a patient schema with a particular per event metadata subschema event = pa.struct( [ - ("time", pa.timestamp("us")), - ("code", pa.string()), - ("text_value", pa.string()), - ("numeric_value", pa.float32()), - ("properties", per_event_properties_schema), + pa.field("time", pa.timestamp("us"), nullable=False), + pa.field("code", pa.string(), nullable=False), + pa.field("text_value", pa.string()), + pa.field("numeric_value", pa.float32()), + pa.field("properties", per_event_properties_schema), ] ) patient = pa.schema( [ - ("patient_id", pa.int64()), - ("events", pa.list_(event)), # Require ordered by time + pa.field("patient_id", pa.int64(), nullable=False), + pa.field("events", pa.list_(event), nullable=False), # Require ordered by time ] ) From ae31fdebd640ae5d45749b3f453c827a6e20fdfb Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 10:05:06 -0700 Subject: [PATCH 04/10] Update schema.py --- src/meds/schema.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/meds/schema.py b/src/meds/schema.py index 257649e..03f978a 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -32,6 +32,7 @@ def patient_schema(per_event_properties_schema=pa.null()): pa.field("code", pa.string(), nullable=False), pa.field("text_value", pa.string()), pa.field("numeric_value", pa.float32()), + pa.field("datetime_value", pa.timestamp("us")), pa.field("properties", per_event_properties_schema), ] ) @@ -55,6 +56,7 @@ def patient_schema(per_event_properties_schema=pa.null()): "code": str, "text_value": NotRequired[str], "numeric_value": NotRequired[float], + "datetime_value": NotRequired[datetime.datetime], "properties": NotRequired[Any], }, ) From f116c573560cd43575fe26aec0f37bd93c6fdb2f Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 20:28:16 -0700 Subject: [PATCH 05/10] Update schema.py --- src/meds/schema.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/meds/schema.py b/src/meds/schema.py index 03f978a..78a8cb2 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -21,14 +21,11 @@ birth_code = "SNOMED/184099003" death_code = "SNOMED/419620001" -# We define static events as always occurring on January 1st, 1 AD -static_event_time = datetime.datetime(1, 1, 1) - def patient_schema(per_event_properties_schema=pa.null()): # Return a patient schema with a particular per event metadata subschema event = pa.struct( [ - pa.field("time", pa.timestamp("us"), nullable=False), + pa.field("time", pa.timestamp("us")), # Static events will have a null timestamp pa.field("code", pa.string(), nullable=False), pa.field("text_value", pa.string()), pa.field("numeric_value", pa.float32()), @@ -40,7 +37,7 @@ def patient_schema(per_event_properties_schema=pa.null()): patient = pa.schema( [ pa.field("patient_id", pa.int64(), nullable=False), - pa.field("events", pa.list_(event), nullable=False), # Require ordered by time + pa.field("events", pa.list_(event), nullable=False), # Require ordered by time, nulls must be first ] ) From 042d7e61d696549aa5dfe63dfa791db4a7e635af Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 20:48:04 -0700 Subject: [PATCH 06/10] Update test_schema.py --- tests/test_schema.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_schema.py b/tests/test_schema.py index ce27f9b..ccdf31b 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -15,22 +15,13 @@ def test_patient_schema(): patient_data = [ { "patient_id": 123, - "static_measurements": [{ - "code": "some_static_code", - "text_value": "example", - "numeric_value": 1.0, - "datetime_value": datetime.datetime(2019, 1, 1, 0, 0, 0), - "metadata": None, - }], "events": [{ # Nested list for events "time": datetime.datetime(2020, 1, 1, 12, 0, 0), - "measurements": [{ # Nested list for measurements - "code": "some_code", - "text_value": "Example", - "numeric_value": 10.0, - "datetime_value": datetime.datetime(2020, 1, 1, 12, 0, 0), - "metadata": None - }] + "code": "some_code", + "text_value": "Example", + "numeric_value": 10.0, + "datetime_value": datetime.datetime(2020, 1, 1, 12, 0, 0), + "properties": None }] } ] From dee42903d4289c28479a4cfc427537a8c9140c6a Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 20:48:46 -0700 Subject: [PATCH 07/10] Update README.md --- README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/README.md b/README.md index b97deac..b84b352 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,11 @@ The Python type signature for the schema is as follows: Patient = TypedDict('Patient', { 'patient_id': int, - 'static_measurements': List[Measurement], 'events': List[Event], }) Event = TypedDict('Event',{ - 'time': datetime.datetime, - 'measurements': List[Measurement], -}) - -Measurement = TypedDict('Measurement', { + 'time': NotRequired[datetime.datetime], 'code': str, 'text_value': NotRequired[str], 'numeric_value': NotRequired[float], From e33037640d2c6ab2c4700526d1c2f35f79051a59 Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 20:49:03 -0700 Subject: [PATCH 08/10] Update __init__.py --- src/meds/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/meds/__init__.py b/src/meds/__init__.py index 27701a2..2c76ef6 100644 --- a/src/meds/__init__.py +++ b/src/meds/__init__.py @@ -1,6 +1,6 @@ from meds._version import __version__ # noqa -from .schema import (patient_schema, Measurement, Event, Patient, label, Label, +from .schema import (patient_schema, Event, Patient, label, Label, code_metadata_entry, code_metadata, dataset_metadata, CodeMetadataEntry, CodeMetadata, DatasetMetadata, birth_code, death_code) @@ -9,7 +9,6 @@ # List all objects that we want to export _exported_objects = { 'patient_schema': patient_schema, - 'Measurement': Measurement, 'Event': Event, 'Patient': Patient, 'label': label, From c1ebdc86884126a02a0103baac32d7cab614d09e Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Tue, 30 Apr 2024 20:49:21 -0700 Subject: [PATCH 09/10] Update schema.py --- src/meds/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meds/schema.py b/src/meds/schema.py index 78a8cb2..8bdd748 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -49,7 +49,7 @@ def patient_schema(per_event_properties_schema=pa.null()): Event = TypedDict( "Event", { - "time": datetime.datetime, + "time": NotRequired[datetime.datetime], "code": str, "text_value": NotRequired[str], "numeric_value": NotRequired[float], From 08b25625ace4d922b93483306532dde97ad39fa4 Mon Sep 17 00:00:00 2001 From: Ethan Steinberg Date: Sat, 4 May 2024 14:43:27 -0700 Subject: [PATCH 10/10] Fix null issue --- .gitignore | 1 + src/meds/schema.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 68bc17f..e84b398 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Byte-compiled / optimized / DLL files +_version.py __pycache__/ *.py[cod] *$py.class diff --git a/src/meds/schema.py b/src/meds/schema.py index 8bdd748..02e7f48 100644 --- a/src/meds/schema.py +++ b/src/meds/schema.py @@ -25,19 +25,19 @@ def patient_schema(per_event_properties_schema=pa.null()): # Return a patient schema with a particular per event metadata subschema event = pa.struct( [ - pa.field("time", pa.timestamp("us")), # Static events will have a null timestamp - pa.field("code", pa.string(), nullable=False), - pa.field("text_value", pa.string()), - pa.field("numeric_value", pa.float32()), - pa.field("datetime_value", pa.timestamp("us")), - pa.field("properties", per_event_properties_schema), + ("time", pa.timestamp("us")), # Static events will have a null timestamp + ("code", pa.string()), + ("text_value", pa.string()), + ("numeric_value", pa.float32()), + ("datetime_value", pa.timestamp("us")), + ("properties", per_event_properties_schema), ] ) patient = pa.schema( [ - pa.field("patient_id", pa.int64(), nullable=False), - pa.field("events", pa.list_(event), nullable=False), # Require ordered by time, nulls must be first + ("patient_id", pa.int64()), + ("events", pa.list_(event)), # Require ordered by time, nulls must be first ] )