globaldothealth · iamleeg · Jul 7, 2022 · Jul 6, 2022 · Jul 6, 2022 · Jul 6, 2022
diff --git a/data-serving/reusable-data-service/data_service/__init__.py b/data-serving/reusable-data-service/data_service/__init__.py
@@ -1,7 +1,3 @@
 __version__ = "0.1.0"
 
-from .model.case import Case
-from .model.case_reference import CaseReference
-from .controller.case_controller import CaseController
-from .stores.mongo_store import MongoStore
-from .main import app, main, set_up_controllers
+from .main import app, main
diff --git a/data-serving/reusable-data-service/data_service/controller/case_controller.py b/data-serving/reusable-data-service/data_service/controller/case_controller.py
@@ -1,9 +1,11 @@
 from flask import jsonify
 from datetime import date
-from typing import List
+from typing import List, Optional
 
 from data_service.model.case import Case
+from data_service.model.case_exclusion_metadata import CaseExclusionMetadata
 from data_service.model.case_page import CasePage
+from data_service.model.case_reference import CaseReference
 from data_service.model.case_upsert_outcome import CaseUpsertOutcome
 from data_service.model.filter import (
     Anything,
@@ -148,6 +150,44 @@ def generate_output():
 
         return generate_output
 
+    def batch_status_change(
+        self,
+        status: str,
+        note: Optional[str] = None,
+        case_ids: Optional[List[str]] = None,
+        filter: Optional[str] = None,
+    ):
+        """Update all of the cases identified in case_ids to have the supplied curation status.
+        Raises PreconditionUnsatisfiedError or ValidationError on invalid input."""
+        statuses = CaseReference.valid_statuses()
+        if not status in statuses:
+            raise PreconditionUnsatisfiedError(f"status {status} not one of {statuses}")
+        if filter is not None and case_ids is not None:
+            raise PreconditionUnsatisfiedError(
+                "Do not supply both a filter and a list of IDs"
+            )
+        if status == "EXCLUDED" and note is None:
+            raise ValidationError(f"Excluding cases must be documented in a note")
+
+        def update_status(id: str, status: str, note: str):
+            if status == "EXCLUDED":
+                caseExclusion = CaseExclusionMetadata()
+                caseExclusion.note = note
+            else:
+                caseExclusion = None
+            self.store.update_case_status(id, status, caseExclusion)
+
+        if case_ids is not None:
+            for anId in case_ids:
+                update_status(anId, status, note)
+        else:
+            predicate = CaseController.parse_filter(filter)
+            if predicate is None:
+                raise ValidationError(f"cannot understand query {filter}")
+            case_iterator = self.store.matching_case_iterator(predicate)
+            for case in case_iterator:
+                update_status(case._id, status, note)
+
     def create_case_if_valid(self, maybe_case: dict):
         """Attempts to create a case from an input dictionary and validate it against
         the application rules. Raises ValidationError or PreconditionUnsatisfiedError on invalid input."""

diff --git a/data-serving/reusable-data-service/data_service/main.py b/data-serving/reusable-data-service/data_service/main.py
@@ -1,6 +1,7 @@
 from datetime import date
 from flask import Flask, jsonify, request
-from . import CaseController, MongoStore
+from data_service.controller.case_controller import CaseController
+from data_service.stores.mongo_store import MongoStore
 from data_service.util.errors import (
     PreconditionUnsatisfiedError,
     UnsupportedTypeError,
@@ -88,6 +89,21 @@ def download_cases():
         return jsonify({"message": e.args[0]}), e.http_code
 
 
+@app.route("/api/cases/batchStatusChange", methods=["POST"])
+def batch_status_change():
+    try:
+        req = request.get_json()
+        case_controller.batch_status_change(
+            status=req.get("status"),
+            note=req.get("note"),
+            case_ids=req.get("caseIds"),
+            filter=req.get("query"),
+        )
+        return "", 204
+    except WebApplicationError as e:
+        return jsonify({"message": e.args[0]}), e.http_code
+
+
 def set_up_controllers():
     global case_controller
     store_options = {"mongodb": MongoStore.setup}

diff --git a/data-serving/reusable-data-service/data_service/model/case.py b/data-serving/reusable-data-service/data_service/model/case.py
@@ -5,16 +5,17 @@
 
 from typing import Any, List
 
+from data_service.model.case_exclusion_metadata import CaseExclusionMetadata
 from data_service.model.case_reference import CaseReference
+from data_service.model.document import Document
 from data_service.util.errors import (
     PreconditionUnsatisfiedError,
     ValidationError,
 )
-from data_service.util.json_encoder import JSONEncoder
 
 
 @dataclasses.dataclass()
-class DayZeroCase:
+class DayZeroCase(Document):
     """This class implements the "day-zero" data schema for Global.health.
     At the beginning of an outbreak, we want to collect at least this much
     information about an individual case for the line list.
@@ -33,6 +34,7 @@ class DayZeroCase:
     _id: str = dataclasses.field(init=False, default=None)
     confirmationDate: datetime.date = dataclasses.field(init=False)
     caseReference: CaseReference = dataclasses.field(init=False, default=None)
+    caseExclusion: CaseExclusionMetadata = dataclasses.field(init=False, default=None)
 
     @classmethod
     def from_json(cls, obj: str) -> type:
@@ -45,27 +47,19 @@ def from_dict(cls, dictionary: dict[str, Any]) -> type:
         case = cls()
         for key in dictionary:
             if key in cls.date_fields():
-                # handle a few different ways dates get represented in dictionaries
-                maybe_date = dictionary[key]
-                if isinstance(maybe_date, datetime.datetime):
-                    value = maybe_date.date()
-                elif isinstance(maybe_date, datetime.date):
-                    value = maybe_date
-                elif isinstance(maybe_date, str):
-                    value = datetime.datetime.strptime(
-                        maybe_date, "%Y-%m-%dT%H:%M:%S.%fZ"
-                    ).date()
-                elif isinstance(maybe_date, dict) and "$date" in maybe_date:
-                    value = datetime.datetime.strptime(
-                        maybe_date["$date"], "%Y-%m-%dT%H:%M:%SZ"
-                    ).date()
-                else:
-                    raise ValueError(f"Cannot interpret date {maybe_date}")
+                value = cls.interpret_date(dictionary[key])
             elif key == "caseReference":
                 caseRef = dictionary[key]
                 value = (
                     CaseReference.from_dict(caseRef) if caseRef is not None else None
                 )
+            elif key == "caseExclusion":
+                exclusion = dictionary[key]
+                value = (
+                    CaseExclusionMetadata.from_dict(exclusion)
+                    if exclusion is not None
+                    else None
+                )
             elif key == "_id":
                 the_id = dictionary[key]
                 if isinstance(the_id, dict):
@@ -91,83 +85,6 @@ def validate(self):
             raise ValidationError("Case Reference must have a value")
         self.caseReference.validate()
 
-    def to_dict(self):
-        """Return myself as a dictionary."""
-        return dataclasses.asdict(self)
-
-    def to_json(self):
-        """Return myself as JSON"""
-        return JSONEncoder().encode(self.to_dict())
-
-    @classmethod
-    def date_fields(cls) -> list[str]:
-        """Record where dates are kept because they sometimes need special treatment."""
-        return [f.name for f in dataclasses.fields(cls) if f.type == datetime.date]
-
-    @classmethod
-    def field_names(cls) -> List[str]:
-        """The list of names of fields in this class and member dataclasses."""
-        fields = []
-        for f in dataclasses.fields(cls):
-            if dataclasses.is_dataclass(f.type):
-                fields += [f"{f.name}.{g.name}" for g in dataclasses.fields(f.type)]
-            else:
-                fields.append(f.name)
-        return fields
-
-    @classmethod
-    def delimiter_separated_header(cls, sep: str) -> str:
-        """Create a line naming all of the fields in this class and member dataclasses."""
-        return sep.join(cls.field_names()) + "\n"
-
-    @classmethod
-    def tsv_header(cls) -> str:
-        """Generate the header row for a TSV file containing members of this class."""
-        return cls.delimiter_separated_header("\t")
-
-    @classmethod
-    def csv_header(cls) -> str:
-        """Generate the header row for a CSV file containing members of this class."""
-        return cls.delimiter_separated_header(",")
-
-    @classmethod
-    def json_header(cls) -> str:
-        """The start of a JSON array."""
-        return "["
-
-    @classmethod
-    def json_footer(cls) -> str:
-        """The end of a JSON array."""
-        return "]"
-
-    @classmethod
-    def json_separator(cls) -> str:
-        """The string between values in a JSON array."""
-        return ","
-
-    def field_values(self) -> List[str]:
-        """The list of values of fields on this object and member dataclasses."""
-        fields = []
-        for f in dataclasses.fields(self):
-            value = getattr(self, f.name)
-            if dataclasses.is_dataclass(f.type):
-                fields.append(value.to_csv())
-            else:
-                fields.append(str(value) if value is not None else "")
-        return fields
-
-    def delimiter_separated_values(self, sep: str) -> str:
-        """Create a line listing all of the fields in me and my member dataclasses."""
-        return sep.join(self.field_values()) + "\n"
-
-    def to_tsv(self) -> str:
-        """Generate a row in a CSV file representing myself."""
-        return self.delimiter_separated_values("\t")
-
-    def to_csv(self) -> str:
-        """Generate a row in a CSV file representing myself."""
-        return self.delimiter_separated_values(",")
-
 
 # Actually we want to capture extra fields which can be specified dynamically:
 # so Case is the class that you should use.

diff --git a/data-serving/reusable-data-service/data_service/model/case_exclusion_metadata.py b/data-serving/reusable-data-service/data_service/model/case_exclusion_metadata.py
@@ -0,0 +1,34 @@
+import dataclasses
+import datetime
+
+from typing import Any
+
+from data_service.model.document import Document
+
+
+@dataclasses.dataclass
+class CaseExclusionMetadata(Document):
+    """If a case is excluded, record when and why."""
+
+    _: dataclasses.KW_ONLY
+    note: str = dataclasses.field(init=False, default=None)
+    date: datetime.date = dataclasses.field(
+        init=False, default=None
+    )  # Populate at initialisation time, not class load time
+
+    def __post_init__(self):
+        self.date = datetime.datetime.now().date()
+
+    @classmethod
+    def exclude_from_download(cls):
+        return True
+
+    @classmethod
+    def from_dict(cls, dictionary: dict[str, Any]) -> type:
+        """Create a CaseExclusionMetadata from a dictionary representation."""
+        exclusion = CaseExclusionMetadata()
+        exclusion.note = dictionary.get("note")
+        exclusion.date = cls.interpret_date(dictionary.get("date"))
+        if exclusion.date is None:
+            raise ValueError(f"date missing in CaseExclusion document {dict}")
+        return exclusion
diff --git a/data-serving/reusable-data-service/data_service/model/case_reference.py b/data-serving/reusable-data-service/data_service/model/case_reference.py
@@ -1,20 +1,30 @@
 import bson
 import dataclasses
 
+from data_service.model.document import Document
+
 
 @dataclasses.dataclass
-class CaseReference:
+class CaseReference(Document):
     """Represents information about the source of a given case."""
 
     _: dataclasses.KW_ONLY
     sourceId: bson.ObjectId = dataclasses.field(init=False, default=None)
+    status: str = dataclasses.field(init=False, default="UNVERIFIED")
 
     def validate(self):
         """Check whether I am consistent. Raise ValueError if not."""
         if not hasattr(self, "sourceId"):
             raise ValueError("Source ID is mandatory")
         elif self.sourceId is None:
             raise ValueError("Source ID must have a value")
+        if self.status not in self.valid_statuses():
+            raise ValueError(f"Status {self.status} is not acceptable")
+
+    @staticmethod
+    def valid_statuses():
+        """A case reference must have one of these statuses."""
+        return ["EXCLUDED", "UNVERIFIED", "VERIFIED"]
 
     @staticmethod
     def from_dict(d: dict[str, str]):
@@ -28,14 +38,5 @@ def from_dict(d: dict[str, str]):
                 ref.sourceId = bson.ObjectId(theId["$oid"])
             else:
                 raise ValueError(f"Cannot interpret {theId} as an ObjectId")
+        ref.status = d["status"] if "status" in d else "UNVERIFIED"
         return ref
-
-    def to_csv(self) -> str:
-        """Generate a row in a CSV file representing myself."""
-        fields = []
-        for f in dataclasses.fields(self):
-            if dataclasses.is_dataclass(f.type):
-                fields.append(getattr(self, f.name).to_csv())
-            else:
-                fields.append(str(getattr(self, f.name)))
-        return ",".join(fields)