Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2714 prepare to define full day zero schema #2782

Merged
merged 8 commits into from
Jul 26, 2022
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"key": "_id",
"type": "string",
"data_dictionary_text": "A unique identifier for this case, in the form of a mongoDB object identifier (24 characters 0-9a-f).",
"required": false
},
{
"key": "confirmationDate",
"type": "date",
"data_dictionary_text": "The date on which the case was confirmed. There will also be a confirmation event but the date is stored denormalised for efficiency.",
"required": true
},
{
"key": "caseReference",
"type": "CaseReference",
"data_dictionary_text": "Information about the source and status of this case.",
"required": true
},
{
"key": "caseExclusion",
"type": "CaseExclusion",
"data_dictionary_text": "If this case is excluded from the line list, information about when and why it was excluded.",
"required": false
},
{
"key": "location",
"type": "geofeature",
"data_dictionary_text": "The location associated with this case.",
"required": false
}
]
108 changes: 11 additions & 97 deletions data-serving/reusable-data-service/data_service/model/case.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import dataclasses
import datetime
import importlib.resources
import json
import flask.json

from collections.abc import Callable
from operator import attrgetter
from typing import Any, List

from data_service.model.case_exclusion_metadata import CaseExclusionMetadata
from data_service.model.case_reference import CaseReference
Expand All @@ -16,105 +14,20 @@
ConflictError,
DependencyFailedError,
PreconditionUnsatisfiedError,
ValidationError,
)


@dataclasses.dataclass()
class DayZeroCase(Document):
"""This class implements the "day-zero" data schema for Global.health.
At the beginning of an outbreak, we want to collect at least this much
information about an individual case for the line list.

Parameters here are defined to be keyword-only and not set in the
initialiser, so that clients can use Builder to populate them. Use
the validate() method to determine whether an instance is in a
consistent state (this also means we can add custom validation logic
to that function)."""

_: dataclasses.KW_ONLY
"""_id is treated as an opaque identifier by the model, allowing the
store to use whatever format it needs to uniquely identify a stored case.
The _id is allowed to be None, for cases that have been created but not
yet saved into a store."""
_id: str = dataclasses.field(init=False, default=None)
confirmationDate: datetime.date = dataclasses.field(init=False)
caseReference: CaseReference = dataclasses.field(init=False, default=None)
caseExclusion: CaseExclusionMetadata = dataclasses.field(init=False, default=None)
location: Feature = dataclasses.field(init=False, default=None)

custom_fields = []

@classmethod
def from_json(cls, obj: str) -> type:
"""Create an instance of this class from a JSON representation."""
source = json.loads(obj)
return cls.from_dict(source)

@classmethod
def from_dict(cls, dictionary: dict[str, Any]) -> type:
case = cls()
for key in dictionary:
if key in cls.date_fields():
value = cls.interpret_date(dictionary[key])
elif key in cls.location_fields():
value = Feature.from_dict(dictionary[key])
elif key == "caseReference":
caseRef = dictionary[key]
value = (
CaseReference.from_dict(caseRef) if caseRef is not None else None
)
elif key == "caseExclusion":
exclusion = dictionary[key]
value = (
CaseExclusionMetadata.from_dict(exclusion)
if exclusion is not None
else None
)
elif key == "_id":
the_id = dictionary[key]
if isinstance(the_id, dict):
# this came from a BSON objectID representation
value = the_id["$oid"]
else:
value = the_id
else:
value = dictionary[key]
setattr(case, key, value)
case.validate()
return case

def validate(self):
"""Check whether I am consistent. Raise ValidationError if not."""
if not hasattr(self, "confirmationDate"):
raise ValidationError("Confirmation Date is mandatory")
elif self.confirmationDate is None:
raise ValidationError("Confirmation Date must have a value")
if not hasattr(self, "caseReference"):
raise ValidationError("Case Reference is mandatory")
elif self.caseReference is None:
raise ValidationError("Case Reference must have a value")
self.caseReference.validate()
for field in self.custom_fields:
if field.required is True and attrgetter(field.key)(self) is None:
raise ValidationError(f"{field.key} must have a value")


observers = []

# Actually we want to capture extra fields which can be specified dynamically:
# so Case is the class that you should use.


def make_custom_case_class(name: str, fields=[], field_models=[]) -> type:
def make_custom_case_class(name: str, field_models=[]) -> type:
"""Generate a class extending the DayZeroCase class with additional fields.
fields is a list of dataclass fields that should be added to the generated class.
field_models is a list of model objects describing the fields for the data dictionary
and for validation."""
# FIXME generate the fields list from the field_models
global Case
fields = [f.dataclasses_tuple() for f in field_models]
try:
new_case_class = dataclasses.make_dataclass(name, fields, bases=(DayZeroCase,))
new_case_class = dataclasses.make_dataclass(name, fields, bases=(Document,))
except TypeError as e:
raise DependencyFailedError(*(e.args))
new_case_class.custom_fields = field_models
Expand Down Expand Up @@ -154,13 +67,16 @@ def remove_case_class_observer(observer: Callable[[type], None]) -> None:
def reset_custom_case_fields() -> None:
"""When you want to get back to where you started, for example to load the field definitions from
storage or if you're writing tests that modify the Case class."""
make_custom_case_class("Case", [], [])
day_zero_field_definitions = json.loads(
importlib.resources.read_text("data_service", "day_zero_fields.json")
)
day_zero_fields = [Field.from_dict(f) for f in day_zero_field_definitions]
make_custom_case_class("Case", day_zero_fields)


def add_field_to_case_class(field_model: Field) -> None:
existing_fields = dataclasses.fields(Case)
field_models = Case.custom_fields
if field_model.key in [f.name for f in existing_fields]:
if field_model.key in [f.key for f in field_models]:
raise ConflictError(f"field {field_model.key} already exists")
if field_model.type not in Field.acceptable_types:
raise PreconditionUnsatisfiedError(
Expand All @@ -170,11 +86,9 @@ def add_field_to_case_class(field_model: Field) -> None:
raise PreconditionUnsatisfiedError(
f"field {field_model.key} is required so it must have a default value"
)
fields_list = [(f.name, f.type, f) for f in existing_fields]
fields_list.append(field_model.dataclasses_tuple())
field_models.append(field_model)
# re-invent the Case class
make_custom_case_class("Case", fields_list, field_models)
make_custom_case_class("Case", field_models)


# let's start with a clean slate on first load
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class CaseReference(Document):

def validate(self):
"""Check whether I am consistent. Raise ValueError if not."""
super().validate()
if not hasattr(self, "sourceId"):
raise ValueError("Source ID is mandatory")
elif self.sourceId is None:
Expand Down
50 changes: 48 additions & 2 deletions data-serving/reusable-data-service/data_service/model/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
import datetime
import io
import operator
import json
import flask.json

from data_service.model.document_update import DocumentUpdate
from data_service.model.geojson import Feature
from data_service.util.errors import ValidationError
from data_service.util.json_encoder import JSONEncoder

from typing import List
from typing import Any, List


@dataclasses.dataclass
class Document:
"""The base class for anything that's going into the database."""

custom_fields = []

def to_dict(self):
"""Me, as a dictionary."""
return dataclasses.asdict(self)
Expand All @@ -33,9 +38,13 @@ def date_fields(cls) -> list[str]:
def location_fields(cls) -> list[str]:
return cls.fields_of_class(Feature)

@classmethod
def document_fields(cls) -> list[str]:
return cls.fields_of_class(Document)

@classmethod
def fields_of_class(cls, a_class: type) -> list[str]:
return [f.name for f in dataclasses.fields(cls) if f.type == a_class]
return [f.name for f in dataclasses.fields(cls) if issubclass(f.type, a_class)]

@staticmethod
def interpret_date(maybe_date) -> datetime.date:
Expand Down Expand Up @@ -152,6 +161,43 @@ def apply_update(self, update: DocumentUpdate):
for key in update.unsets_iter():
self._internal_set_value(key, None)

@classmethod
def from_json(cls, obj: str) -> type:
"""Create an instance of this class from a JSON representation."""
source = json.loads(obj)
return cls.from_dict(source)

@classmethod
def from_dict(cls, dictionary: dict[str, Any]) -> type:
doc = cls()
for key in dictionary:
if key in cls.date_fields():
value = cls.interpret_date(dictionary[key])
elif key in cls.location_fields():
value = Feature.from_dict(dictionary[key])
elif key in cls.document_fields():
field_type = cls.field_type_for_key_path(key)
dict_description = dictionary[key]
value = (
field_type.from_dict(dict_description)
if dict_description is not None
else None
)
else:
value = dictionary[key]
setattr(doc, key, value)
doc.validate()
return doc

def validate(self):
"""Check whether I am consistent. Raise ValidationError if not."""
for field in self.custom_fields:
getter = operator.attrgetter(field.key)
if field.required is True and getter(self) is None:
raise ValidationError(f"{field.key} must have a value")
if field.key in self.document_fields() and getter(self) is not None:
getter(self).validate()

def _internal_set_value(self, key, value):
self._internal_ensure_containers_exist(key)
container, prop = self._internal_object_and_property_for_key_path(key)
Expand Down
23 changes: 22 additions & 1 deletion data-serving/reusable-data-service/data_service/model/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
from datetime import date
from typing import Optional, Union

from data_service.model.case_exclusion_metadata import CaseExclusionMetadata
from data_service.model.case_reference import CaseReference
from data_service.model.document import Document
from data_service.model.geojson import Feature
from data_service.util.errors import PreconditionUnsatisfiedError


Expand All @@ -20,7 +23,15 @@ class Field(Document):
STRING = "string"
DATE = "date"
INTEGER = "integer"
type_map = {STRING: str, DATE: date, INTEGER: int}
LOCATION = "geofeature"
type_map = {
STRING: str,
DATE: date,
INTEGER: int,
LOCATION: Feature,
"CaseReference": CaseReference,
"CaseExclusion": CaseExclusionMetadata,
}
acceptable_types = type_map.keys()

@classmethod
Expand All @@ -30,6 +41,16 @@ def model_type(cls, name: str) -> type:
except KeyError:
raise PreconditionUnsatisfiedError(f"cannot use type {name} in a Field")

@classmethod
def from_dict(cls, dictionary):
return cls(
dictionary.get("key"),
dictionary.get("type"),
dictionary.get("data_dictionary_text"),
dictionary.get("required"),
dictionary.get("default", None),
)

def python_type(self) -> type:
return self.model_type(self.type)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def custom_field_names(cls) -> List[str]:
@classmethod
def custom_none_field_values(cls) -> List[str]:
"""Provide an application-specific report of this class's fields and values for CSV export."""
print("Asked for None values")
return [""] * len(cls.field_getters)

def custom_field_values(self) -> List[str]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from bson.errors import InvalidId
from bson.json_util import dumps
from bson.objectid import ObjectId
from typing import List, Tuple
from typing import Any, List, Tuple


Case = None
Expand Down Expand Up @@ -60,7 +60,7 @@ def case_by_id(self, id: str):
case = self.get_case_collection().find_one({"_id": ObjectId(id)})
if case is not None:
# case includes BSON fields like ObjectID - convert into JSON for use by the app
return Case.from_json(dumps(case))
return Case.from_dict(MongoStore.mongo_document_to_json(case))
else:
return None
except InvalidId:
Expand All @@ -70,7 +70,7 @@ def fetch_cases(self, page: int, limit: int, filter: Filter):
cases = self.get_case_collection().find(
filter.to_mongo_query(), skip=(page - 1) * limit, limit=limit
)
return [Case.from_json(dumps(c)) for c in cases]
return [Case.from_dict(MongoStore.mongo_document_to_json(c)) for c in cases]

def count_cases(self, filter: Filter) -> int:
if isinstance(filter, Anything):
Expand Down Expand Up @@ -130,7 +130,7 @@ def excluded_cases(self, source_id: str, filter: Filter) -> List[Case]:
]
}
)
return [Case.from_json(dumps(c)) for c in cases]
return [Case.from_dict(MongoStore.mongo_document_to_json(c)) for c in cases]

def update_case(self, id: str, update: DocumentUpdate):
if len(update) == 0:
Expand Down Expand Up @@ -194,7 +194,10 @@ def identified_case_iterator(self, caseIds: List[str]):
@staticmethod
def case_model_iterator(mongo_iterator):
"""Turn an iterator of mongo results into an iterator of cases."""
return map(lambda c: Case.from_json(dumps(c)), mongo_iterator)
return map(
lambda c: Case.from_dict(MongoStore.mongo_document_to_json(c)),
mongo_iterator,
)

@staticmethod
def setup():
Expand Down Expand Up @@ -232,6 +235,16 @@ def case_to_bson_compatible_dict(case: Case):
] = MongoStore.case_exclusion_to_bson_compatible_dict(case.caseExclusion)
return bson_case

@staticmethod
def mongo_document_to_json(doc) -> Any:
"""Patch up some bson-specific warts in a pymongo document before handing back to the app."""
# Because bson.json_util doesn't expose dump we have to round-trip through a string
dictionary = loads(dumps(doc))
the_id = dictionary["_id"]
if isinstance(the_id, dict):
dictionary["_id"] = the_id["$oid"]
return dictionary

@staticmethod
def case_exclusion_to_bson_compatible_dict(exclusion: CaseExclusionMetadata):
"""Turn a case exclusion document into a representation that mongo will accept."""
Expand Down
Loading