From 3ed58e906a0fd90fa340e4983571007fdc3988ca Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Mon, 16 Sep 2024 15:01:18 +0200 Subject: [PATCH 01/19] add utils for field and type analysis --- CHANGELOG.md | 4 ++ mex/common/models/base/model.py | 7 +- mex/common/utils.py | 115 +++++++++++++++++++++++++++----- tests/test_utils.py | 108 +++++++++++++++++++++++++++--- 4 files changed, 204 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67c676e4..e7ca8fa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `contains_only_types` to check if fields are annotated as desired +- `group_fields_by_class_name` utility to simplify filtered model/field lookups +- new parameters to `get_inner_types` to customize what to unpack + ### Changes ### Deprecated diff --git a/mex/common/models/base/model.py b/mex/common/models/base/model.py index a3fda56c..e5ac4802 100644 --- a/mex/common/models/base/model.py +++ b/mex/common/models/base/model.py @@ -2,8 +2,7 @@ import json from collections.abc import MutableMapping from functools import cache -from types import UnionType -from typing import Any, Union +from typing import Any from pydantic import BaseModel as PydanticBaseModel from pydantic import ( @@ -100,9 +99,7 @@ def _get_list_field_names(cls) -> list[str]: """Build a cached list of fields that look like lists.""" field_names = [] for field_name, field_info in cls.get_all_fields().items(): - field_types = get_inner_types( - field_info.annotation, unpack=(Union, UnionType) - ) + field_types = get_inner_types(field_info.annotation, unpack_list=False) if any( isinstance(field_type, type) and issubclass(field_type, list) for field_type in field_types diff --git a/mex/common/utils.py b/mex/common/utils.py index f527bda5..6d407842 100644 --- a/mex/common/utils.py +++ b/mex/common/utils.py @@ -1,19 +1,27 @@ import re -from collections.abc import Container, Generator, Iterable, Iterator +from collections.abc import Callable, Container, Generator, Iterable, Iterator, Mapping from functools import cache from itertools import zip_longest from random import random from time import sleep -from types import UnionType +from types import NoneType, UnionType from typing import ( + TYPE_CHECKING, Annotated, Any, + Literal, TypeVar, Union, get_args, get_origin, ) +from pydantic.fields import FieldInfo + +if TYPE_CHECKING: # pragma: no cover + from mex.common.models import GenericFieldInfo + from mex.common.models.base.model import BaseModel + T = TypeVar("T") @@ -36,23 +44,100 @@ def any_contains_any(bases: Iterable[Container[T] | None], tokens: Iterable[T]) return False +def contains_only_types(field: "GenericFieldInfo", *types: type) -> bool: + """Return whether a `field` is annotated as one of the given `types`. + + Unions, lists and type annotations are checked for their inner types and only the + non-`NoneType` types are considered for the type-check. + + Args: + field: A `GenericFieldInfo` instance + types: Types to look for in the field's annotation + + Returns: + Whether the field contains any of the given types + """ + if inner_types := list(get_inner_types(field.annotation, include_none=False)): + return all(inner_type in types for inner_type in inner_types) + return False + + def get_inner_types( - annotation: Any, unpack: Iterable[Any] = (Union, UnionType, list) + annotation: Any, + include_none: bool = True, + unpack_list: bool = True, + unpack_literal: bool = True, ) -> Generator[type, None, None]: - """Yield all inner types from annotations and the types in `unpack`.""" - origin = get_origin(annotation) - if origin == Annotated: - yield from get_inner_types(get_args(annotation)[0], unpack) - elif origin in unpack: - for arg in get_args(annotation): - yield from get_inner_types(arg, unpack) - elif origin is not None: - yield origin - elif annotation is None: - yield type(None) - else: + """Recursively yield all inner types from a given type annotation. + + Args: + annotation: The type annotation to process + include_none: Whether to include NoneTypes in output + unpack_list: Whether to unpack list types + unpack_literal: Whether to unpack Literal types + + Returns: + All inner types found within the annotation + """ + # Check whether to unpack lists in addition to annotations and unions + types_to_unpack = [Annotated, Union, UnionType] + ([list] if unpack_list else []) + + # Get the unsubscripted version of the given type annotation + origin_type = get_origin(annotation) + + # If the origin should be unpacked + if origin_type in types_to_unpack: + for inner_type in get_args(annotation): + # Recursively process each inner type, skipping pydantic's FieldInfo + if not isinstance(inner_type, FieldInfo): + yield from get_inner_types( + inner_type, include_none, unpack_list, unpack_literal + ) + + # Handle Literal types based on the unpack_literal flag + elif origin_type is Literal: + if unpack_literal: + yield origin_type # Return Literal if unpacking is allowed + else: + yield annotation # Return the full annotation if not + + # Yield the origin type if present + elif origin_type is not None: + yield origin_type + + # Yield the annotation if it isn't none + elif annotation not in (None, NoneType): yield annotation + # Optionally yield none + elif include_none: + yield NoneType + + +def group_fields_by_class_name( + model_classes_by_name: Mapping[str, type["BaseModel"]], + predicate: Callable[["GenericFieldInfo"], bool], +) -> dict[str, list[str]]: + """Group the field names by model class and filter them by the given predicate. + + Args: + model_classes_by_name: Map from class names to model classes + predicate: Function to filter the fields of the classes by + + Returns: + Dictionary mapping class names to a list of field names filtered by `predicate` + """ + return { + name: sorted( + { + field_name + for field_name, field_info in cls.get_all_fields().items() + if predicate(field_info) + } + ) + for name, cls in model_classes_by_name.items() + } + @cache def normalize(string: str) -> str: diff --git a/tests/test_utils.py b/tests/test_utils.py index 438b1cdb..58e4a68f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,16 +1,27 @@ import json import time from collections.abc import Iterable -from typing import Annotated, Any +from types import NoneType +from typing import Annotated, Any, Literal import pytest +from pydantic.fields import FieldInfo +from mex.common.models import BaseModel +from mex.common.types import ( + MERGED_IDENTIFIER_CLASSES, + Identifier, + MergedPersonIdentifier, +) from mex.common.utils import ( any_contains_any, contains_any, + contains_only_types, get_inner_types, + group_fields_by_class_name, grouper, jitter_sleep, + normalize, ) @@ -43,18 +54,95 @@ def test_any_contains_any(base: Any, tokens: Iterable[Any], expected: bool) -> N @pytest.mark.parametrize( - ("annotation", "expected_types"), + ("annotation", "types", "expected"), + ( + (None, [str], False), + (str, [str], True), + (str, [Identifier], False), + (Identifier, [str], False), + (list[str | int | list[str]], [str, float], False), + (list[str | int | list[str]], [int, str], True), + (MergedPersonIdentifier | None, MERGED_IDENTIFIER_CLASSES, True), + ), + ids=[ + "static None", + "simple str", + "str vs identifier", + "identifier vs str", + "complex miss", + "complex hit", + "optional identifier", + ], +) +def test_contains_only_types( + annotation: Any, types: list[type], expected: bool +) -> None: + class DummyModel(BaseModel): + attribute: annotation + + assert contains_only_types(DummyModel.model_fields["attribute"], *types) == expected + + +@pytest.mark.parametrize( + ("annotation", "flags", "expected_types"), ( - (str, [str]), - (None, [type(None)]), - (str | None, [str, type(None)]), - (list[str] | None, [str, type(None)]), - (list[str | int | list[str]], [str, int, str]), - (Annotated[str | int, "This is a string or integer"], [str, int]), + (str, {}, [str]), + (None, {}, [NoneType]), + (None, {"include_none": False}, []), + (str | None, {}, [str, NoneType]), + (str | None, {"include_none": False}, [str]), + (list[str] | None, {}, [str, NoneType]), + (list[str | None], {}, [str, NoneType]), + (list[int], {"unpack_list": False}, [list]), + (list[str | int | list[str]], {}, [str, int, str]), + (Annotated[str | int, FieldInfo(description="str or int")], {}, [str, int]), + (Literal["okay"] | None, {}, [Literal, NoneType]), + ( + Literal["okay"] | None, + {"unpack_literal": False}, + [Literal["okay"], NoneType], + ), ), + ids=[ + "string", + "None allowing None", + "None skipping None", + "optional string allowing None", + "optional string skipping None", + "optional list of strings", + "list of optional strings", + "not unpacking list", + "list nested in list", + "annotated string or int", + "unpacking literal", + "not unpacking literal", + ], +) +def test_get_inner_types( + annotation: Any, flags: dict[str, bool], expected_types: list[type] +) -> None: + assert list(get_inner_types(annotation, **flags)) == expected_types + + +def test_group_fields_by_class_name() -> None: + class DummyModel(BaseModel): + number: int + text: str + + class PseudoModel(BaseModel): + title: str + + lookup = {"Dummy": DummyModel, "Pseudo": PseudoModel} + expected = {"Dummy": ["text"], "Pseudo": ["title"]} + assert group_fields_by_class_name(lookup, lambda f: f.annotation is str) == expected + + +@pytest.mark.parametrize( + ("string", "expected"), + (("", ""), ("__XYZ__", "xyz"), ("/foo/BAR$42", "foo bar 42")), ) -def test_get_inner_types(annotation: Any, expected_types: list[type]) -> None: - assert list(get_inner_types(annotation)) == expected_types +def test_normalize(string: str, expected: str) -> None: + assert normalize(string) == expected def test_grouper() -> None: From 98f9d1912ef73b8bff56d7a442aef2144dfe4c70 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Fri, 20 Sep 2024 16:42:32 +0200 Subject: [PATCH 02/19] update types and schemas --- mex/common/backend_api/connector.py | 11 +-- mex/common/models/access_platform.py | 38 +++---- mex/common/models/activity.py | 38 ++++--- mex/common/models/distribution.py | 34 +------ mex/common/models/primary_source.py | 12 ++- mex/common/models/resource.py | 104 +++++--------------- mex/common/models/variable.py | 19 +--- mex/common/types/__init__.py | 2 + mex/common/types/email.py | 12 ++- mex/common/types/identifier.py | 25 ++--- mex/common/types/path.py | 46 +++------ mex/common/types/temporal_entity.py | 59 +++++------ mex/common/types/vocabulary.py | 43 ++++++-- tests/backend_api/test_connector.py | 4 +- tests/models/test_model_schemas.py | 9 +- tests/test_settings.py | 15 +-- tests/types/test_data/dummy-vocabulary.json | 8 +- tests/types/test_identifier.py | 26 ++--- tests/types/test_temporal_entity.py | 9 -- tests/types/test_vocabulary.py | 14 ++- 20 files changed, 210 insertions(+), 318 deletions(-) diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 4db67cfe..3a4588fe 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -141,7 +141,6 @@ def fetch_merged_items( Returns: One page of merged items and the total count that was matched """ - # XXX this endpoint will only return faux merged items for now (MX-1382) response = self.request( method="GET", endpoint="merged-item", @@ -156,12 +155,12 @@ def fetch_merged_items( def get_merged_item( self, - stable_target_id: str, + identifier: str, ) -> AnyMergedModel: - """Return one merged item for the given `stableTargetId`. + """Return one merged item for the given `identifier`. Args: - stable_target_id: The merged item's identifier + identifier: The merged item's identifier Raises: MExError: If no merged item was found @@ -174,7 +173,7 @@ def get_merged_item( method="GET", endpoint="merged-item", params={ - "stableTargetId": stable_target_id, + "identifier": identifier, "limit": "1", }, ) @@ -201,7 +200,6 @@ def preview_merged_item( Returns: A single merged item """ - # XXX experimental method until the backend has a preview endpoint (MX-1406) response = self.request( method="GET", endpoint=f"preview-item/{stable_target_id}", @@ -224,7 +222,6 @@ def get_rule_set( Returns: A set of three rules """ - # XXX experimental method until the backend has a rule-set endpoint (MX-1416) response = self.request( method="GET", endpoint=f"rule-set/{stable_target_id}", diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py index e068952c..38b0daf8 100644 --- a/mex/common/models/access_platform.py +++ b/mex/common/models/access_platform.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -16,6 +16,7 @@ from mex.common.types import ( APIType, ExtractedAccessPlatformIdentifier, + Identifier, Link, MergedAccessPlatformIdentifier, MergedContactPointIdentifier, @@ -36,9 +37,12 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): alternativeTitle: list[Text] = [] contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] description: list[Text] = [] landingPage: list[Link] = [] @@ -48,39 +52,23 @@ class _OptionalLists(_Stem): class _OptionalValues(_Stem): endpointDescription: Link | None = None - endpointType: ( - Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])] - | None - ) = None + endpointType: APIType | None = None endpointURL: Link | None = None class _RequiredValues(_Stem): - technicalAccessibility: Annotated[ - TechnicalAccessibility, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] + technicalAccessibility: TechnicalAccessibility class _SparseValues(_Stem): - technicalAccessibility: Annotated[ - TechnicalAccessibility | None, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] = None + technicalAccessibility: TechnicalAccessibility | None = None class _VariadicValues(_Stem): endpointDescription: list[Link] - endpointType: list[ - Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])] - ] = [] + endpointType: list[APIType] = [] endpointURL: list[Link] = [] - technicalAccessibility: list[ - Annotated[ - TechnicalAccessibility, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] - ] = [] + technicalAccessibility: list[TechnicalAccessibility] = [] class BaseAccessPlatform(_OptionalLists, _OptionalValues, _RequiredValues): diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py index 8ed2de24..03e1bd25 100644 --- a/mex/common/models/activity.py +++ b/mex/common/models/activity.py @@ -5,7 +5,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -19,6 +19,7 @@ from mex.common.types import ( ActivityType, ExtractedActivityIdentifier, + Identifier, Link, MergedActivityIdentifier, MergedContactPointIdentifier, @@ -39,15 +40,16 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): abstract: list[Text] = [] - activityType: list[ - Annotated[ - ActivityType, Field(examples=["https://mex.rki.de/item/activity-type-1"]) - ] - ] = [] + activityType: list[ActivityType] = [] alternativeTitle: list[Text] = [] documentation: list[Link] = [] end: list[YearMonthDay | YearMonth] = [] - externalAssociate: list[MergedOrganizationIdentifier | MergedPersonIdentifier] = [] + externalAssociate: list[ + Annotated[ + MergedOrganizationIdentifier | MergedPersonIdentifier, + AfterValidator(Identifier), + ] + ] = [] funderOrCommissioner: list[MergedOrganizationIdentifier] = [] fundingProgram: list[str] = [] involvedPerson: list[MergedPersonIdentifier] = [] @@ -57,18 +59,19 @@ class _OptionalLists(_Stem): shortName: list[Text] = [] start: list[YearMonthDay | YearMonth] = [] succeeds: list[MergedActivityIdentifier] = [] - theme: list[ - Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] - ] = [] + theme: list[Theme] = [] website: list[Link] = [] class _RequiredLists(_Stem): contact: Annotated[ list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier, + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ], Field(min_length=1), ] @@ -80,9 +83,12 @@ class _RequiredLists(_Stem): class _SparseLists(_Stem): contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier, + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] responsibleUnit: list[MergedOrganizationalUnitIdentifier] = [] title: list[Text] = [] diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py index 295374e8..16bac711 100644 --- a/mex/common/models/distribution.py +++ b/mex/common/models/distribution.py @@ -59,26 +59,13 @@ class _OptionalValues(_Stem): accessService: MergedAccessPlatformIdentifier | None = None accessURL: Link | None = None downloadURL: Link | None = None - license: ( - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None - ) = None - mediaType: ( - Annotated[ - MIMEType, - Field( - examples=["https://mex.rki.de/item/mime-type-1"], - ), - ] - | None - ) = None + license: License | None = None + mediaType: MIMEType | None = None modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None class _RequiredValues(_Stem): - accessRestriction: Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] + accessRestriction: AccessRestriction issued: YearMonthDayTime | YearMonthDay | YearMonth title: Annotated[ str, @@ -90,13 +77,7 @@ class _RequiredValues(_Stem): class _SparseValues(_Stem): - accessRestriction: ( - Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] - | None - ) = None + accessRestriction: AccessRestriction | None = None issued: YearMonthDayTime | YearMonthDay | YearMonth | None = None title: ( Annotated[ @@ -111,12 +92,7 @@ class _SparseValues(_Stem): class _VariadicValues(_Stem): - accessRestriction: list[ - Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] - ] = [] + accessRestriction: list[AccessRestriction] = [] issued: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] title: list[ Annotated[ diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py index f96f307c..0cee5692 100644 --- a/mex/common/models/primary_source.py +++ b/mex/common/models/primary_source.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -15,6 +15,7 @@ ) from mex.common.types import ( ExtractedPrimarySourceIdentifier, + Identifier, Link, MergedContactPointIdentifier, MergedOrganizationalUnitIdentifier, @@ -33,9 +34,12 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): alternativeTitle: list[Text] = [] contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] description: list[Text] = [] documentation: list[Link] = [] diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py index 43df13a6..d7826368 100644 --- a/mex/common/models/resource.py +++ b/mex/common/models/resource.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -19,6 +19,7 @@ DataProcessingState, ExtractedResourceIdentifier, Frequency, + Identifier, Language, License, Link, @@ -47,14 +48,7 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): accessPlatform: list[MergedAccessPlatformIdentifier] = [] alternativeTitle: list[Text] = [] - anonymizationPseudonymization: list[ - Annotated[ - AnonymizationPseudonymization, - Field( - examples=["https://mex.rki.de/item/anonymization-pseudonymization-1"] - ), - ] - ] = [] + anonymizationPseudonymization: list[AnonymizationPseudonymization] = [] contributingUnit: list[MergedOrganizationalUnitIdentifier] = [] contributor: list[MergedPersonIdentifier] = [] creator: list[MergedPersonIdentifier] = [] @@ -66,9 +60,7 @@ class _OptionalLists(_Stem): instrumentToolOrApparatus: list[Text] = [] isPartOf: list[MergedResourceIdentifier] = [] keyword: list[Text] = [] - language: list[ - Annotated[Language, Field(examples=["https://mex.rki.de/item/language-1"])] - ] = [] + language: list[Language] = [] loincId: list[str] = [] meshId: list[ Annotated[ @@ -85,40 +77,26 @@ class _OptionalLists(_Stem): publication: list[Link] = [] publisher: list[MergedOrganizationIdentifier] = [] qualityInformation: list[Text] = [] - resourceTypeGeneral: list[ - Annotated[ - ResourceTypeGeneral, - Field( - examples=["https://mex.rki.de/item/resource-type-general-1"], - ), - ] - ] = [] + resourceTypeGeneral: list[ResourceTypeGeneral] = [] resourceTypeSpecific: list[Text] = [] rights: list[Text] = [] spatial: list[Text] = [] - stateOfDataProcessing: list[ - Annotated[ - DataProcessingState, - Field( - examples=["https://mex.rki.de/item/data-processing-state-1"], - ), - ] - ] = [] + stateOfDataProcessing: list[DataProcessingState] = [] class _RequiredLists(_Stem): contact: Annotated[ list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ], Field(min_length=1), ] - theme: Annotated[ - list[Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]], - Field(min_length=1), - ] + theme: Annotated[list[Theme], Field(min_length=1)] title: Annotated[list[Text], Field(min_length=1)] unitInCharge: Annotated[ list[MergedOrganizationalUnitIdentifier], Field(min_length=1) @@ -127,26 +105,22 @@ class _RequiredLists(_Stem): class _SparseLists(_Stem): contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier - ] = [] - theme: list[ - Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] + theme: list[Theme] = [] title: list[Text] = [] unitInCharge: list[MergedOrganizationalUnitIdentifier] = [] class _OptionalValues(_Stem): - accrualPeriodicity: ( - Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])] - | None - ) = None + accrualPeriodicity: Frequency | None = None created: YearMonthDayTime | YearMonthDay | YearMonth | None = None - license: ( - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None - ) = None + license: License | None = None modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None sizeOfDataBasis: str | None = None temporal: ( @@ -170,42 +144,18 @@ class _OptionalValues(_Stem): class _RequiredValues(_Stem): - accessRestriction: Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] + accessRestriction: AccessRestriction class _SparseValues(_Stem): - accessRestriction: ( - Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] - | None - ) = None + accessRestriction: AccessRestriction | None = None class _VariadicValues(_Stem): - accessRestriction: list[ - Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] - ] = [] - accrualPeriodicity: list[ - Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])] - ] = [] + accessRestriction: list[AccessRestriction] = [] + accrualPeriodicity: list[Frequency] = [] created: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] - license: list[ - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] - ] = [] + license: list[License] = [] modified: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] sizeOfDataBasis: list[str] = [] temporal: list[ diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py index 9e511edb..bd35ffb6 100644 --- a/mex/common/models/variable.py +++ b/mex/common/models/variable.py @@ -86,15 +86,7 @@ class _OptionalValues(_Stem): ] | None ) = None - dataType: ( - Annotated[ - DataType, - Field( - examples=["https://mex.rki.de/item/data-type-1"], - ), - ] - | None - ) = None + dataType: DataType | None = None class _VariadicValues(_Stem): @@ -106,14 +98,7 @@ class _VariadicValues(_Stem): ), ] ] = [] - dataType: list[ - Annotated[ - DataType, - Field( - examples=["https://mex.rki.de/item/data-type-1"], - ), - ] - ] = [] + dataType: list[DataType] = [] class BaseVariable(_OptionalLists, _RequiredLists, _OptionalValues): diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 8df713b9..84488183 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -46,6 +46,7 @@ ) from mex.common.types.text import Text, TextLanguage from mex.common.types.vocabulary import ( + VOCABULARY_PATTERN, AccessRestriction, ActivityType, AnonymizationPseudonymization, @@ -131,6 +132,7 @@ "TextLanguage", "Theme", "UTC", + "VOCABULARY_PATTERN", "VocabularyEnum", "VocabularyLoader", "WorkPath", diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 89942581..3a98df1b 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -14,7 +14,13 @@ def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add the email regex.""" - return core_schema.str_schema(pattern=EMAIL_PATTERN) + return core_schema.chain_schema( + [ + core_schema.str_schema(pattern=EMAIL_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + ) @classmethod def __get_pydantic_json_schema__( @@ -26,3 +32,7 @@ def __get_pydantic_json_schema__( json_schema_["format"] = "email" json_schema_["examples"] = ["info@rki.de"] return json_schema_ + + def __repr__(self) -> str: + """Overwrite the default representation.""" + return f'{self.__class__.__name__}("{self}")' diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index 2b6eb9b2..2fdccdf3 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -1,4 +1,3 @@ -import re import string from typing import Any, Self from uuid import UUID, uuid4 @@ -8,7 +7,6 @@ MEX_ID_ALPHABET = string.ascii_letters + string.digits MEX_ID_PATTERN = r"^[a-zA-Z0-9]{14,22}$" -UUID_PATTERN = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" class Identifier(str): @@ -29,25 +27,17 @@ def generate(cls, seed: int | None = None) -> Self: output += MEX_ID_ALPHABET[digit] return cls(output[::-1]) - @classmethod - def validate(cls, value: Any) -> Self: - """Validate a string, UUID or Identifier.""" - if isinstance(value, str | UUID | Identifier): - value = str(value) - if re.match(MEX_ID_PATTERN, value): - return cls(value) - if re.match(UUID_PATTERN, value): - return cls.generate(seed=UUID(value).int) - raise ValueError(f"Invalid identifier format: {value}") - raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}") - @classmethod def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add the ID regex.""" - return core_schema.no_info_before_validator_function( - cls.validate, core_schema.str_schema(pattern=MEX_ID_PATTERN) + return core_schema.chain_schema( + [ + core_schema.str_schema(pattern=MEX_ID_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), ) @classmethod @@ -58,11 +48,12 @@ def __get_pydantic_json_schema__( json_schema_ = handler(core_schema_) json_schema_ = handler.resolve_ref_schema(json_schema_) json_schema_["title"] = cls.__name__ + json_schema_["pattern"] = MEX_ID_PATTERN return json_schema_ def __repr__(self) -> str: """Overwrite the default representation.""" - return f"{self.__class__.__name__}({super().__str__().__repr__()})" + return f'{self.__class__.__name__}("{self}")' # We have technically-identical subclasses of identifier types (one per entity-type). diff --git a/mex/common/types/path.py b/mex/common/types/path.py index 7cc78925..6a1539d1 100644 --- a/mex/common/types/path.py +++ b/mex/common/types/path.py @@ -1,6 +1,6 @@ from os import PathLike from pathlib import Path -from typing import Any, Self, Union +from typing import Any, Union from pydantic import GetCoreSchemaHandler from pydantic_core import core_schema @@ -19,6 +19,19 @@ def __init__(self, path: Union[str, Path, "PathWrapper"]) -> None: path = path._path self._path = path + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + """Modify the core schema to add validation and serialization rules.""" + return core_schema.chain_schema( + [ + core_schema.is_instance_schema(str | Path | PathWrapper), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + ) + def __fspath__(self) -> str: """Return the file system path representation.""" return self._path.__fspath__() @@ -49,37 +62,6 @@ def is_relative(self) -> bool: """True if the underlying path is relative.""" return not self._path.is_absolute() - @classmethod - def __get_pydantic_core_schema__( - cls, source_type: Any, handler: GetCoreSchemaHandler - ) -> core_schema.CoreSchema: - """Set schema to str schema.""" - from_str_schema = core_schema.chain_schema( - [ - core_schema.str_schema(), - core_schema.no_info_plain_validator_function( - cls.validate, - ), - ] - ) - from_anything_schema = core_schema.chain_schema( - [ - core_schema.no_info_plain_validator_function(cls.validate), - core_schema.is_instance_schema(PathWrapper), - ] - ) - return core_schema.json_or_python_schema( - json_schema=from_str_schema, - python_schema=from_anything_schema, - ) - - @classmethod - def validate(cls, value: Any) -> Self: - """Convert a string value to a Text instance.""" - if isinstance(value, str | Path | PathWrapper): - return cls(value) - raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}") - class AssetsPath(PathWrapper): """Custom path for settings that can be absolute or relative to `assets_dir`.""" diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py index 48f8c8df..a0af7085 100644 --- a/mex/common/types/temporal_entity.py +++ b/mex/common/types/temporal_entity.py @@ -193,27 +193,20 @@ def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add validation and serialization rules.""" - from_str_schema = core_schema.chain_schema( - [ - core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN), - core_schema.no_info_plain_validator_function( - cls.validate, - ), - ] - ) - from_anything_schema = core_schema.chain_schema( - [ - core_schema.no_info_plain_validator_function(cls.validate), - core_schema.is_instance_schema(cls), - ] - ) - serialization_schema = core_schema.plain_serializer_function_ser_schema( - lambda instance: str(instance) - ) return core_schema.json_or_python_schema( - json_schema=from_str_schema, - python_schema=from_anything_schema, - serialization=serialization_schema, + json_schema=core_schema.chain_schema( + [ + core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ] + ), + python_schema=core_schema.chain_schema( + [ + core_schema.is_instance_schema(cls | date | str | TemporalEntity), + core_schema.no_info_plain_validator_function(cls), + ] + ), + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), ) @classmethod @@ -221,17 +214,10 @@ def __get_pydantic_json_schema__( cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> json_schema.JsonSchemaValue: """Modify the json schema to add a title, examples and an optional format.""" - json_schema = handler(core_schema_) - json_schema["title"] = cls.__name__ - json_schema.update(cls.JSON_SCHEMA_CONFIG) - return json_schema - - @classmethod - def validate(cls, value: Any) -> "TemporalEntity": - """Parse any value and try to convert it into a temporal entity.""" - if isinstance(value, cls | date | str | TemporalEntity): - return cls(value) - raise TypeError(f"Cannot parse {type(value)} as {cls.__name__}") + json_schema_ = handler(core_schema_) + json_schema_["title"] = cls.__name__ + json_schema_.update(cls.JSON_SCHEMA_CONFIG) + return json_schema_ @staticmethod def _parse_integers( @@ -286,20 +272,21 @@ def _parse_date( def __eq__(self, other: object) -> bool: """Return whether the given other value is the same as this one.""" try: - other = self.validate(other) + other_temporal = TemporalEntity(other) # type: ignore[call-overload] except TypeError: return False return bool( - self.date_time == other.date_time and self.precision == other.precision + self.date_time == other_temporal.date_time + and self.precision == other_temporal.precision ) def __gt__(self, other: Any) -> bool: """Return whether the given other value is the greater than this one.""" try: - other = self.validate(other) + other_temporal = TemporalEntity(other) # type: ignore[call-overload] except TypeError: raise NotImplementedError from None - return bool(self.date_time > other.date_time) + return bool(self.date_time > other_temporal.date_time) def __str__(self) -> str: """Render temporal entity with format fitting for its precision.""" @@ -308,7 +295,7 @@ def __str__(self) -> str: ) def __repr__(self) -> str: - """Render a presentation showing this is not just a datetime.""" + """Overwrite the default representation.""" return f'{self.__class__.__name__}("{self}")' diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py index f17b06f5..ac2b439b 100644 --- a/mex/common/types/vocabulary.py +++ b/mex/common/types/vocabulary.py @@ -5,7 +5,14 @@ from importlib.resources import files from typing import TYPE_CHECKING, ClassVar, Self, Union -from pydantic import AnyUrl, BaseModel +from pydantic import ( + AnyUrl, + BaseModel, + GetCoreSchemaHandler, + GetJsonSchemaHandler, + json_schema, +) +from pydantic_core import core_schema from mex.common.utils import normalize @@ -15,6 +22,7 @@ from mex.common.types import Text MODEL_VOCABULARIES = files("mex.model.vocabularies") +VOCABULARY_PATTERN = r"https://mex.rki.de/item/[a-z0-9-]+" class BilingualText(BaseModel): @@ -71,10 +79,6 @@ class VocabularyEnum(Enum, metaclass=VocabularyLoader): __vocabulary__: ClassVar[str] __concepts__: ClassVar[list[Concept]] - def __repr__(self) -> str: - """Overwrite representation because dynamic enum names are unknown to mypy.""" - return f'{self.__class__.__name__}["{self.name}"]' - @classmethod def find(cls, search_term: Union[str, "Text"]) -> Self | None: """Get the enum instance that matches a label of the underlying concepts. @@ -99,12 +103,39 @@ def find(cls, search_term: Union[str, "Text"]) -> Self | None: continue if language is None: searchable_labels.extend([normalize(label.de), normalize(label.en)]) - elif language_label := label.dict().get(language.value): + elif language_label := label.model_dump().get(language.value): searchable_labels.append(normalize(language_label)) if search_term in searchable_labels: return cls(str(concept.identifier)) return None + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: object, handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + """Modify the core schema to add the vocabulary regex.""" + return core_schema.chain_schema( + [ + core_schema.str_schema(pattern=VOCABULARY_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + ) + + @classmethod + def __get_pydantic_json_schema__( + cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler + ) -> json_schema.JsonSchemaValue: + """Modify the json schema to add the scheme and an example.""" + json_schema_ = handler(core_schema_) + json_schema_["examples"] = [f"https://mex.rki.de/item/{cls.__vocabulary__}-1"] + json_schema_["useScheme"] = f"https://mex.rki.de/item/{cls.__vocabulary__}" + return json_schema_ + + def __repr__(self) -> str: + """Overwrite representation because dynamic enum names are unknown to mypy.""" + return f'{self.__class__.__name__}["{self.name}"]' + class AccessRestriction(VocabularyEnum): """The access restriction type.""" diff --git a/tests/backend_api/test_connector.py b/tests/backend_api/test_connector.py index a2de406d..083c0f38 100644 --- a/tests/backend_api/test_connector.py +++ b/tests/backend_api/test_connector.py @@ -133,7 +133,7 @@ def test_get_merged_item_mocked( "GET", "http://localhost:8080/v0/merged-item", { - "stableTargetId": "NGwfzG8ROsrvIiQIVDVy", + "identifier": "NGwfzG8ROsrvIiQIVDVy", "limit": "1", }, headers={ @@ -156,7 +156,7 @@ def test_get_merged_item_error_mocked(mocked_backend: MagicMock) -> None: "GET", "http://localhost:8080/v0/merged-item", { - "stableTargetId": "NGwfzG8ROsrvIiQIVDVy", + "identifier": "NGwfzG8ROsrvIiQIVDVy", "limit": "1", }, headers={ diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py index 5e31b283..cdf639bb 100644 --- a/tests/models/test_model_schemas.py +++ b/tests/models/test_model_schemas.py @@ -10,7 +10,7 @@ from mex.common.models import EXTRACTED_MODEL_CLASSES, BaseModel from mex.common.transform import dromedary_to_kebab -from mex.common.types.identifier import MEX_ID_PATTERN +from mex.common.types import MEX_ID_PATTERN, VOCABULARY_PATTERN MEX_MODEL_ENTITIES = files("mex.model.entities") @@ -137,8 +137,6 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: # pop annotations that we don't compare directly but use for other comparisons title = obj.pop("title", "") # only in model (autogenerated by pydantic) - use_scheme = obj.pop("useScheme", "") # only in spec (needed to select vocabulary) - vocabulary = use_scheme.removeprefix("https://mex.rki.de/item/") # vocabulary name # align reference paths # (the paths to referenced vocabularies and types differ between the models @@ -156,9 +154,10 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: ) # align concept/enum annotations - # (spec uses `useScheme` to specify vocabularies and models use enums) if obj.get("$ref") == "/schema/entities/concept#/identifier": - obj["$ref"] = f"/schema/fields/{vocabulary}" + obj["pattern"] = VOCABULARY_PATTERN + obj["type"] = "string" + obj.pop("$ref") # make sure all refs have paths in kebab-case # (the models use the class names, whereas the spec uses kebab-case URLs) diff --git a/tests/test_settings.py b/tests/test_settings.py index 64680fdc..2a4d375e 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -88,13 +88,8 @@ class DummySettings(BaseSettings): sub_model=SubModel(sub_model_path=relative), ) - settings_dict = settings.model_dump() - assert settings_dict["non_path"] == "blablabla" - assert settings_dict["abs_work_path"] == absolute - assert settings_dict["rel_work_path"] == WorkPath(settings.work_dir / relative) - assert settings_dict["assets_path"] == AssetsPath( - absolute / "assets_dir" / relative - ) - assert settings_dict["sub_model"]["sub_model_path"] == WorkPath( - settings.work_dir / relative - ) + assert settings.non_path == "blablabla" + assert settings.abs_work_path == absolute + assert settings.rel_work_path == WorkPath(settings.work_dir / relative) + assert settings.assets_path == AssetsPath(absolute / "assets_dir" / relative) + assert settings.sub_model.sub_model_path == WorkPath(settings.work_dir / relative) diff --git a/tests/types/test_data/dummy-vocabulary.json b/tests/types/test_data/dummy-vocabulary.json index 4852a481..fd503d2d 100644 --- a/tests/types/test_data/dummy-vocabulary.json +++ b/tests/types/test_data/dummy-vocabulary.json @@ -8,8 +8,8 @@ "de": "desc-de-one", "en": "desc-en-one" }, - "identifier": "https://dummy/concept-one", - "inScheme": "https://dummy/concept", + "identifier": "https://mex.rki.de/item/dummy-concept-1", + "inScheme": "https://mex.rki.de/item/dummy-concept", "prefLabel": { "de": "pref-de-one", "en": "pref-en-one" @@ -17,8 +17,8 @@ }, { "definition": null, - "identifier": "https://dummy/concept-two", - "inScheme": "https://dummy/concept", + "identifier": "https://mex.rki.de/item/dummy-concept-2", + "inScheme": "https://mex.rki.de/item/dummy-concept", "prefLabel": { "de": "pref-de-two", "en": "pref-en-two" diff --git a/tests/types/test_identifier.py b/tests/types/test_identifier.py index 1f350b4e..07d3d7b4 100644 --- a/tests/types/test_identifier.py +++ b/tests/types/test_identifier.py @@ -7,30 +7,20 @@ from mex.common.types import Identifier -class DummyID(Identifier): +class DummyIdentifier(Identifier): pass class DummyModel(BaseModel): id: Identifier - dummy: DummyID | None = None + dummy: DummyIdentifier | None = None def test_identifier_validates() -> None: model_with_obj = DummyModel.model_validate({"id": Identifier("bFQoRhcVH5DIfZ")}) model_with_raw = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"}) - model_with_raw_uuid = DummyModel.model_validate( - {"id": "00000000-0000-4000-8000-000000000539"} - ) - model_with_uuid_obj = DummyModel.model_validate({"id": UUID(int=1337, version=4)}) - - assert ( - model_with_obj.id - == model_with_raw.id - == model_with_raw_uuid.id - == model_with_uuid_obj.id - == Identifier.generate(seed=1337) - ) + + assert model_with_obj.id == model_with_raw.id == Identifier.generate(seed=1337) with pytest.raises(ValidationError): DummyModel.model_validate({"id": "baaiaaaboi!!!"}) @@ -47,7 +37,11 @@ def test_identifier_modifies_schema() -> None: } assert DummyModel.model_json_schema()["properties"]["dummy"] == { "anyOf": [ - {"pattern": "^[a-zA-Z0-9]{14,22}$", "title": "DummyID", "type": "string"}, + { + "pattern": "^[a-zA-Z0-9]{14,22}$", + "title": "DummyIdentifier", + "type": "string", + }, {"type": "null"}, ], "default": None, @@ -56,7 +50,7 @@ def test_identifier_modifies_schema() -> None: def test_identifier_repr() -> None: - assert repr(Identifier("baaiaaaaaaaboi")) == "Identifier('baaiaaaaaaaboi')" + assert repr(Identifier("baaiaaaaaaaboi")) == 'Identifier("baaiaaaaaaaboi")' def test_identifier_generate(monkeypatch: MonkeyPatch) -> None: diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 3f3f18be..4c6d0694 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -103,15 +103,6 @@ def test_temporal_entity_value_errors( cls(*args, **kwargs) -@pytest.mark.parametrize( - ("value", "message"), - [(object(), "Cannot parse as TemporalEntity")], -) -def test_temporal_entity_validation_errors(value: Any, message: str) -> None: - with pytest.raises(TypeError, match=message): - TemporalEntity.validate(value) - - @pytest.mark.parametrize( ("cls", "args", "kwargs", "expected"), [ diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py index 52805e0d..617dbab4 100644 --- a/tests/types/test_vocabulary.py +++ b/tests/types/test_vocabulary.py @@ -35,8 +35,8 @@ class DummyEnum(VocabularyEnum): # check enum values are loaded correctly assert [c.value for c in DummyEnum] == [ - "https://dummy/concept-one", - "https://dummy/concept-two", + "https://mex.rki.de/item/dummy-concept-1", + "https://mex.rki.de/item/dummy-concept-2", ] # check enum instance representation @@ -51,10 +51,14 @@ class DummyModel(BaseModel): # check wrong value raises error with pytest.raises(ValidationError): - DummyModel.model_validate({"dummy": "https://dummy/not-a-valid-concept"}) + DummyModel.model_validate( + {"dummy": "https://mex.rki.de/item/not-a-valid-concept"} + ) # check parsing from string works - model = DummyModel.model_validate({"dummy": "https://dummy/concept-two"}) + model = DummyModel.model_validate( + {"dummy": "https://mex.rki.de/item/dummy-concept-2"} + ) assert model.dummy == DummyEnum["PREF_EN_TWO"] @@ -68,4 +72,4 @@ class DummyEnum(VocabularyEnum): found_enum = DummyEnum.find("pref-de-one") assert found_enum is not None - assert found_enum.value == "https://dummy/concept-one" + assert found_enum.value == "https://mex.rki.de/item/dummy-concept-1" From 79872227198699923039d02451944f26892bbf57 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Tue, 24 Sep 2024 11:46:45 +0200 Subject: [PATCH 03/19] WIP --- CHANGELOG.md | 5 +++ mex/common/models/consent.py | 5 +++ mex/common/types/__init__.py | 7 ++-- mex/common/types/email.py | 1 + mex/common/types/identifier.py | 14 ++++---- mex/common/types/temporal_entity.py | 6 ++-- tests/types/test_email.py | 43 ++++++++++++++++++++++-- tests/types/test_identifier.py | 51 ++++++++++++++++------------- 8 files changed, 93 insertions(+), 39 deletions(-) create mode 100644 mex/common/models/consent.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f63bfd98..d8f3e249 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changes +- BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend +- ensure identifier unions are typed to generic `Identifier` instead of the first match + ### Deprecated ### Removed +- drop manual examples from enum fields, because they are autogenerated now + ### Fixed ### Security diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py new file mode 100644 index 00000000..77cdd379 --- /dev/null +++ b/mex/common/models/consent.py @@ -0,0 +1,5 @@ +# XXX this is a forward-compatibility hint for feature/model-update-v3 + +# when this gets merged with model v3 +# - remove the `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields +# - add `Annotated[..., AfterValidator(Identifier)]` to all identifier union fields diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 84488183..adea9c22 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -1,8 +1,8 @@ from typing import Final, Literal, get_args -from mex.common.types.email import Email +from mex.common.types.email import Email, EMAIL_PATTERN from mex.common.types.identifier import ( - MEX_ID_PATTERN, + IDENTIFIER_PATTERN, ExtractedAccessPlatformIdentifier, ExtractedActivityIdentifier, ExtractedContactPointIdentifier, @@ -78,6 +78,7 @@ "CET", "DataProcessingState", "DataType", + "EMAIL_PATTERN", "Email", "EXTRACTED_IDENTIFIER_CLASSES_BY_NAME", "EXTRACTED_IDENTIFIER_CLASSES", @@ -94,6 +95,7 @@ "ExtractedVariableGroupIdentifier", "ExtractedVariableIdentifier", "Frequency", + "IDENTIFIER_PATTERN", "Identifier", "IdentityProvider", "Language", @@ -115,7 +117,6 @@ "MergedResourceIdentifier", "MergedVariableGroupIdentifier", "MergedVariableIdentifier", - "MEX_ID_PATTERN", "MIMEType", "NESTED_MODEL_CLASSES_BY_NAME", "NESTED_MODEL_CLASSES", diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 3a98df1b..94340db4 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -1,3 +1,4 @@ +import re from typing import Any from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index 2fdccdf3..f888a245 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -1,12 +1,12 @@ import string from typing import Any, Self from uuid import UUID, uuid4 - +import re from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema from pydantic_core import core_schema -MEX_ID_ALPHABET = string.ascii_letters + string.digits -MEX_ID_PATTERN = r"^[a-zA-Z0-9]{14,22}$" +_ALPHABET = string.ascii_letters + string.digits +IDENTIFIER_PATTERN = r"^[a-zA-Z0-9]{14,22}$" class Identifier(str): @@ -17,14 +17,14 @@ def generate(cls, seed: int | None = None) -> Self: """Generate a new identifier from a seed or random UUID version 4.""" # Inspired by https://pypi.org/project/shortuuid output = "" - alpha_len = len(MEX_ID_ALPHABET) + alpha_len = len(_ALPHABET) if seed is None: number = uuid4().int else: number = UUID(int=seed, version=4).int while number: number, digit = divmod(number, alpha_len) - output += MEX_ID_ALPHABET[digit] + output += _ALPHABET[digit] return cls(output[::-1]) @classmethod @@ -34,7 +34,7 @@ def __get_pydantic_core_schema__( """Modify the core schema to add the ID regex.""" return core_schema.chain_schema( [ - core_schema.str_schema(pattern=MEX_ID_PATTERN), + core_schema.str_schema(pattern=IDENTIFIER_PATTERN), core_schema.no_info_plain_validator_function(cls), ], serialization=core_schema.to_string_ser_schema(when_used="unless-none"), @@ -48,7 +48,7 @@ def __get_pydantic_json_schema__( json_schema_ = handler(core_schema_) json_schema_ = handler.resolve_ref_schema(json_schema_) json_schema_["title"] = cls.__name__ - json_schema_["pattern"] = MEX_ID_PATTERN + json_schema_["pattern"] = IDENTIFIER_PATTERN return json_schema_ def __repr__(self) -> str: diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py index a0af7085..3f591db0 100644 --- a/mex/common/types/temporal_entity.py +++ b/mex/common/types/temporal_entity.py @@ -269,10 +269,10 @@ def _parse_date( """Parse a date and assume the precision is days.""" return datetime(value.year, value.month, value.day), TemporalEntityPrecision.DAY - def __eq__(self, other: object) -> bool: + def __eq__(self, other: Any) -> bool: """Return whether the given other value is the same as this one.""" try: - other_temporal = TemporalEntity(other) # type: ignore[call-overload] + other_temporal = TemporalEntity(other) except TypeError: return False return bool( @@ -283,7 +283,7 @@ def __eq__(self, other: object) -> bool: def __gt__(self, other: Any) -> bool: """Return whether the given other value is the greater than this one.""" try: - other_temporal = TemporalEntity(other) # type: ignore[call-overload] + other_temporal = TemporalEntity(other) except TypeError: raise NotImplementedError from None return bool(self.date_time > other_temporal.date_time) diff --git a/tests/types/test_email.py b/tests/types/test_email.py index a18eb7a4..c971da3b 100644 --- a/tests/types/test_email.py +++ b/tests/types/test_email.py @@ -1,16 +1,53 @@ import pytest from pydantic import BaseModel, ValidationError -from mex.common.types import Email +from mex.common.types import Email, EMAIL_PATTERN class DummyModel(BaseModel): email: Email -def test_email() -> None: +def test_email_validation() -> None: model = DummyModel.model_validate({"email": "wasd@def.ghi"}) - assert model.email == "wasd@def.ghi" + assert model.email == Email("wasd@def.ghi") + + model = DummyModel.model_validate({"email": Email("wasd@def.ghi")}) + assert model.email == Email("wasd@def.ghi") + + model = DummyModel(email=Email("wasd@def.ghi")) + assert model.email == Email("wasd@def.ghi") with pytest.raises(ValidationError): DummyModel.model_validate({"email": "foobar"}) + + with pytest.raises(ValidationError): + DummyModel.model_validate({"email": object()}) + + +def test_email_serialization() -> None: + model = DummyModel.model_validate({"email": "wasd@def.ghi"}) + raw = model.model_dump() + + assert raw == {"email": "wasd@def.ghi"} + + +def test_email_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "email": { + "examples": ["info@rki.de"], + "format": "email", + "pattern": EMAIL_PATTERN, + "title": "Email", + "type": "string", + } + }, + "required": ["email"], + "title": "DummyModel", + "type": "object", + } + + +def test_email_repr() -> None: + assert repr(Email("wasd@def.ghi")) == 'Email("wasd@def.ghi")' diff --git a/tests/types/test_identifier.py b/tests/types/test_identifier.py index 07d3d7b4..ac9e0a0c 100644 --- a/tests/types/test_identifier.py +++ b/tests/types/test_identifier.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, ValidationError from pytest import MonkeyPatch -from mex.common.types import Identifier +from mex.common.types import Identifier, IDENTIFIER_PATTERN class DummyIdentifier(Identifier): @@ -12,40 +12,45 @@ class DummyIdentifier(Identifier): class DummyModel(BaseModel): - id: Identifier - dummy: DummyIdentifier | None = None + id: DummyIdentifier -def test_identifier_validates() -> None: - model_with_obj = DummyModel.model_validate({"id": Identifier("bFQoRhcVH5DIfZ")}) - model_with_raw = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"}) +def test_identifier_validation() -> None: + model = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"}) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") - assert model_with_obj.id == model_with_raw.id == Identifier.generate(seed=1337) + model = DummyModel.model_validate({"id": DummyIdentifier("bFQoRhcVH5DIfZ")}) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") + + model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ")) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") with pytest.raises(ValidationError): DummyModel.model_validate({"id": "baaiaaaboi!!!"}) with pytest.raises(ValidationError): - DummyModel.model_validate({"id": 42}) + DummyModel.model_validate({"id": object()}) -def test_identifier_modifies_schema() -> None: - assert DummyModel.model_json_schema()["properties"]["id"] == { - "title": "Identifier", - "type": "string", - "pattern": r"^[a-zA-Z0-9]{14,22}$", - } - assert DummyModel.model_json_schema()["properties"]["dummy"] == { - "anyOf": [ - { - "pattern": "^[a-zA-Z0-9]{14,22}$", +def test_identifier_serialization() -> None: + model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ")) + raw = model.model_dump() + + assert raw == {"id": "bFQoRhcVH5DIfZ"} + + +def test_identifier_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "id": { + "pattern": IDENTIFIER_PATTERN, "title": "DummyIdentifier", "type": "string", - }, - {"type": "null"}, - ], - "default": None, - "title": "Dummy", + } + }, + "required": ["id"], + "title": "DummyModel", + "type": "object", } From 218437ec3f80213920387385541db65e387fd903 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Tue, 24 Sep 2024 11:47:01 +0200 Subject: [PATCH 04/19] WIP --- mex/common/types/__init__.py | 2 +- mex/common/types/email.py | 1 - mex/common/types/identifier.py | 2 +- tests/types/test_email.py | 2 +- tests/types/test_identifier.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index adea9c22..7e0827ea 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -1,6 +1,6 @@ from typing import Final, Literal, get_args -from mex.common.types.email import Email, EMAIL_PATTERN +from mex.common.types.email import EMAIL_PATTERN, Email from mex.common.types.identifier import ( IDENTIFIER_PATTERN, ExtractedAccessPlatformIdentifier, diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 94340db4..3a98df1b 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -1,4 +1,3 @@ -import re from typing import Any from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index f888a245..c2537dee 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -1,7 +1,7 @@ import string from typing import Any, Self from uuid import UUID, uuid4 -import re + from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema from pydantic_core import core_schema diff --git a/tests/types/test_email.py b/tests/types/test_email.py index c971da3b..56485bbd 100644 --- a/tests/types/test_email.py +++ b/tests/types/test_email.py @@ -1,7 +1,7 @@ import pytest from pydantic import BaseModel, ValidationError -from mex.common.types import Email, EMAIL_PATTERN +from mex.common.types import EMAIL_PATTERN, Email class DummyModel(BaseModel): diff --git a/tests/types/test_identifier.py b/tests/types/test_identifier.py index ac9e0a0c..7b6dcbe0 100644 --- a/tests/types/test_identifier.py +++ b/tests/types/test_identifier.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, ValidationError from pytest import MonkeyPatch -from mex.common.types import Identifier, IDENTIFIER_PATTERN +from mex.common.types import IDENTIFIER_PATTERN, Identifier class DummyIdentifier(Identifier): From af7392315912132145da6fc0943f7f50106bd24b Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt Date: Tue, 24 Sep 2024 14:30:15 +0200 Subject: [PATCH 05/19] fix tests --- mex/common/types/__init__.py | 1 + mex/common/types/link.py | 53 +++++++++----------------------- mex/common/types/text.py | 4 --- tests/types/test_link.py | 58 +++++++++++++----------------------- tests/types/test_text.py | 25 ++++++++-------- 5 files changed, 47 insertions(+), 94 deletions(-) diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 7e0827ea..6465a4e9 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -132,6 +132,7 @@ "Text", "TextLanguage", "Theme", + "URL_PATTERN", "UTC", "VOCABULARY_PATTERN", "VocabularyEnum", diff --git a/mex/common/types/link.py b/mex/common/types/link.py index a29ff7cf..9bca6a47 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -1,25 +1,9 @@ -import re from enum import StrEnum from typing import Annotated, Any from pydantic import BaseModel, Field, model_validator -# https://daringfireball.net/projects/markdown/syntax#backslash -MARKDOWN_SPECIAL_CHARS = r"\`*_{}[]()#+-.!" - - -def markdown_escape(string: str) -> str: - """Escape all special characters for markdown usage.""" - for char in MARKDOWN_SPECIAL_CHARS: - string = string.replace(char, f"\\{char}") - return string - - -def markdown_unescape(string: str) -> str: - """Unescape all special characters from a markdown string.""" - for char in MARKDOWN_SPECIAL_CHARS: - string = string.replace(f"\\{char}", char) - return string +URL_PATTERN = r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" class LinkLanguage(StrEnum): @@ -32,10 +16,10 @@ class LinkLanguage(StrEnum): class Link(BaseModel): """Type class for Link objects. - Links can be parsed from nested JSON objects or from markdown strings. + Links can be parsed from nested JSON objects or from raw strings. Example: - Link(url="https://foo", title="Title") == Link.model_validate("[Title](https://foo)") + Link(url="http://foo.bar") == Link.model_validate("http://foo.bar") """ language: LinkLanguage | None = None @@ -43,7 +27,7 @@ class Link(BaseModel): url: Annotated[ str, Field( - pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", + pattern=URL_PATTERN, min_length=1, examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"], json_schema_extra={"format": "uri"}, @@ -52,23 +36,14 @@ class Link(BaseModel): @model_validator(mode="before") @classmethod - def convert_markdown_to_link(cls, values: Any) -> dict[str, Any]: + def validate_strings(cls, value: Any) -> dict[str, Any]: """Convert string input to dictionary.""" - if isinstance(values, dict): - return values - if isinstance(values, str): - if match := re.match(r"\[(?P.*)\]\((?P<url>.*)\)", values): - return { - key: markdown_unescape(value) - for key, value in match.groupdict().items() - } - return {"url": values} - raise ValueError(f"Allowed input types are dict and str, got {type(values)}") - - def __str__(self) -> str: - """Render the link as markdown if a title is set, otherwise as plain url.""" - if title := self.title: - title = markdown_escape(title) - url = markdown_escape(self.url) - return f"[{title}]({url})" - return self.url + if isinstance(value, str): + return {"url": value} + if isinstance(value, dict): + return value + raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + + def __hash__(self) -> int: + """Return the hash of this link.""" + return hash((self.url, self.title, self.language)) diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 2f406fb8..14f55f8f 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -54,10 +54,6 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return value raise ValueError(f"Allowed input types are dict and str, got {type(value)}") - def __str__(self) -> str: - """Return the text value.""" - return self.value - def __hash__(self) -> int: """Return the hash of Text.""" return hash((self.value, self.language)) diff --git a/tests/types/test_link.py b/tests/types/test_link.py index 8e96d5c5..000c2866 100644 --- a/tests/types/test_link.py +++ b/tests/types/test_link.py @@ -1,56 +1,38 @@ -from pydantic import BaseModel +import pytest +from pydantic import BaseModel, ValidationError from mex.common.types import Link, LinkLanguage -def test_parsing_from_string() -> None: - class DummyModel(BaseModel): - link: Link +class DummyModel(BaseModel): + link: Link - # plain link - model = DummyModel.model_validate({"link": "https://example.com"}) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com"} - } - # link with title - model = DummyModel.model_validate({"link": "[Example](https://example.com)"}) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com", "title": "Example"} - } +def test_link_validation() -> None: + with pytest.raises(ValidationError, match="Allowed input types are dict and str"): + _ = DummyModel.model_validate({"link": 1}) - # link with funky characters - model = DummyModel.model_validate( - {"link": r"[\[TEST\] Example](https://example.com/test?q=\(\.\*\))"} - ) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"} + model = DummyModel.model_validate({"link": "https://example.com"}) + assert model.model_dump() == { + "link": { + "language": None, + "title": None, + "url": "https://example.com", + } } - # nested model model = DummyModel.model_validate( {"link": {"url": "https://example.com", "title": "Example", "language": "en"}} ) - assert model.model_dump(exclude_none=True) == { + assert model.model_dump() == { "link": { - "url": "https://example.com", - "title": "Example", "language": LinkLanguage.EN, + "title": "Example", + "url": "https://example.com", } } -def test_rendering_as_string() -> None: - # plain link - link = Link.model_validate({"url": "https://example.com"}) - assert str(link) == "https://example.com" - - # link with title - link = Link.model_validate({"url": "https://example.com", "title": "Example"}) - assert str(link) == r"[Example](https://example\.com)" - - # link with funky characters - link = Link.model_validate( - {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"} - ) - assert str(link) == r"[\[TEST\] Example](https://example\.com/test?q=\(\.\*\))" +def test_link_hash() -> None: + link = Link(url="https://foo.bar", title="Hallo Welt.", language=LinkLanguage.DE) + assert hash(link) == hash(("https://foo.bar", "Hallo Welt.", LinkLanguage.DE)) diff --git a/tests/types/test_text.py b/tests/types/test_text.py index 494814f1..f9f686d6 100644 --- a/tests/types/test_text.py +++ b/tests/types/test_text.py @@ -33,23 +33,27 @@ def test_text_language_detect() -> None: assert none_text.language is None -def test_parsing_from_string() -> None: - class DummyModel(BaseModel): - text: Text +class DummyModel(BaseModel): + text: Text + + +def test_text_validation() -> None: + with pytest.raises(ValidationError, match="Allowed input types are dict and str"): + _ = DummyModel.model_validate({"text": 1}) model = DummyModel.model_validate({"text": "we are parsing a string here"}) assert model.model_dump() == { "text": {"value": "we are parsing a string here", "language": TextLanguage.EN} } - with pytest.raises(ValidationError): - _ = DummyModel.model_validate({"text": 1}) - model = DummyModel.model_validate( - {"text": {"value": "and here, we parsing an object"}} + {"text": {"value": "and here, we are parsing an object"}} ) assert model.model_dump() == { - "text": {"value": "and here, we parsing an object", "language": TextLanguage.EN} + "text": { + "value": "and here, we are parsing an object", + "language": TextLanguage.EN, + } } model = DummyModel.model_validate( @@ -68,11 +72,6 @@ class DummyModel(BaseModel): } -def test_text_str() -> None: - text = Text(value="Hello world.") - assert str(text) == "Hello world." - - def test_text_hash() -> None: text = Text(value="Hallo Welt.", language=TextLanguage.DE) assert hash(text) == hash(("Hallo Welt.", TextLanguage.DE)) From 6220b11fdcc0a40e5ea6ef7cad9ef96fda6c07f3 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Tue, 24 Sep 2024 16:49:00 +0200 Subject: [PATCH 06/19] add tests --- tests/types/test_temporal_entity.py | 29 +++++++++++++++++++++++++---- tests/types/test_vocabulary.py | 24 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 4c6d0694..d8e77c87 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -262,10 +262,31 @@ def test_temporal_entity_repr() -> None: ) -def test_temporal_entity_serialization() -> None: - class Person(BaseModel): - birthday: YearMonthDay +class DummyModel(BaseModel): + birthday: YearMonthDay + + +def test_email_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "birthday": { + "examples": ["2014-08-24"], + "format": "date", + "pattern": "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "title": "YearMonthDay", + "type": "string", + } + }, + "required": ["birthday"], + "title": "DummyModel", + "type": "object", + } + - person = Person.model_validate({"birthday": "24th July 1999"}) +DummyModel.model_json_schema() + + +def test_temporal_entity_serialization() -> None: + person = DummyModel.model_validate({"birthday": "24th July 1999"}) assert person.model_dump_json() == '{"birthday":"1999-07-24"}' diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py index 617dbab4..1e2d0d40 100644 --- a/tests/types/test_vocabulary.py +++ b/tests/types/test_vocabulary.py @@ -62,6 +62,30 @@ class DummyModel(BaseModel): assert model.dummy == DummyEnum["PREF_EN_TWO"] +@pytest.mark.usefixtures("use_dummy_vocabulary") +def test_vocabulary_enum_schema() -> None: + class DummyEnum(VocabularyEnum): + __vocabulary__ = "dummy-vocabulary" + + class DummyModel(BaseModel): + dummy: DummyEnum + + assert DummyModel.model_json_schema() == { + "properties": { + "dummy": { + "examples": ["https://mex.rki.de/item/dummy-vocabulary-1"], + "pattern": "https://mex.rki.de/item/[a-z0-9-]+", + "title": "Dummy", + "type": "string", + "useScheme": "https://mex.rki.de/item/dummy-vocabulary", + } + }, + "required": ["dummy"], + "title": "DummyModel", + "type": "object", + } + + @pytest.mark.usefixtures("use_dummy_vocabulary") def test_vocabulary_enum_find() -> None: class DummyEnum(VocabularyEnum): From 72faee16316d320344bffae28371e781c469d191 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Tue, 24 Sep 2024 17:09:01 +0200 Subject: [PATCH 07/19] update cl --- CHANGELOG.md | 13 +++++++++++++ mex/common/models/consent.py | 8 +++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8f3e249..d03e10da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,17 +12,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `contains_only_types` to check if fields are annotated as desired - `group_fields_by_class_name` utility to simplify filtered model/field lookups - new parameters to `get_inner_types` to customize what to unpack +- add pattern constants for vocabs, emails, urls and ids to types module +- add regex pattern to json schema of identifier fields +- automatically add examples and useScheme to json schema of enum fields ### Changes - BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend - ensure identifier unions are typed to generic `Identifier` instead of the first match +- simplify pydantic schema configuration for all types +- consistently parse emails, identifiers and temporals in models to their type, not str +- consistently serialize emails, ids and temporals in models to str, not their type +- make instances of Link type hashable ### Deprecated ### Removed - drop manual examples from enum fields, because they are autogenerated now +- BREAKING: remove `MEX_ID_PATTERN` from types, in favor of `IDENTIFIER_PATTERN` +- BREAKING: make public `MEX_ID_ALPHABET` constant from identifier module private +- BREAKING: remove `__str__` methods from Text and Link classes +- BREAKING: drop support for parsing UUIDs as Identifiers, this was unused +- BREAKING: drop support for parsing Links from markdown syntax, this was unused +- BREAKING: remove pydantic1-style `validate` methods from all type models ### Fixed diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py index 77cdd379..3e62ee0b 100644 --- a/mex/common/models/consent.py +++ b/mex/common/models/consent.py @@ -1,5 +1,3 @@ -# XXX this is a forward-compatibility hint for feature/model-update-v3 - -# when this gets merged with model v3 -# - remove the `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields -# - add `Annotated[..., AfterValidator(Identifier)]` to all identifier union fields +# XXX this is a forward-compatibility hint for feature/model-update-v3: +# when this gets merged with model v3, remove the +# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields From b8b7937305a9a9ac9bbfadfb28d8c93771384eaa Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Tue, 24 Sep 2024 17:15:28 +0200 Subject: [PATCH 08/19] fix import --- tests/models/test_model_schemas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py index cdf639bb..c828485c 100644 --- a/tests/models/test_model_schemas.py +++ b/tests/models/test_model_schemas.py @@ -10,7 +10,7 @@ from mex.common.models import EXTRACTED_MODEL_CLASSES, BaseModel from mex.common.transform import dromedary_to_kebab -from mex.common.types import MEX_ID_PATTERN, VOCABULARY_PATTERN +from mex.common.types import IDENTIFIER_PATTERN, VOCABULARY_PATTERN MEX_MODEL_ENTITIES = files("mex.model.entities") @@ -141,7 +141,7 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: # align reference paths # (the paths to referenced vocabularies and types differ between the models # and the specification, so we need to make sure they match before comparing) - if obj.get("pattern") == MEX_ID_PATTERN: + if obj.get("pattern") == IDENTIFIER_PATTERN: obj.pop("pattern") obj.pop("type") if field in ("identifier", "stableTargetId"): From 78b762cd8dc4ece3ca018498461ed972aae1f10b Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Mon, 30 Sep 2024 15:41:57 +0200 Subject: [PATCH 09/19] fix serializer --- mex/common/types/vocabulary.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py index ac2b439b..98a8f706 100644 --- a/mex/common/types/vocabulary.py +++ b/mex/common/types/vocabulary.py @@ -116,10 +116,16 @@ def __get_pydantic_core_schema__( """Modify the core schema to add the vocabulary regex.""" return core_schema.chain_schema( [ + core_schema.no_info_plain_validator_function( + lambda v: v.value if isinstance(v, cls) else v + ), core_schema.str_schema(pattern=VOCABULARY_PATTERN), core_schema.no_info_plain_validator_function(cls), ], - serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + serialization=core_schema.plain_serializer_function_ser_schema( + lambda s: s.value, + return_schema=core_schema.str_schema(pattern=VOCABULARY_PATTERN), + ), ) @classmethod From 4799be6c1973880591fb937605433f3d1fdbd0fe Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Mon, 30 Sep 2024 15:42:41 +0200 Subject: [PATCH 10/19] update doc --- mex/common/backend_api/connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 3a4588fe..434e97ca 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -56,7 +56,7 @@ def post_models( Returns: Identifiers of posted extracted models """ - # XXX deprecated method, please use `post_extracted_models` instead + # XXX deprecated method, please use `post_extracted_items` instead return cast( list[AnyExtractedIdentifier], self.post_extracted_items(extracted_items).identifiers, From 227a3025a43db1b908f8bab76f6fbb0a0d5a25ff Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Mon, 30 Sep 2024 16:17:14 +0200 Subject: [PATCH 11/19] clean up vocabs --- mex/common/types/vocabulary.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py index 98a8f706..1426dcad 100644 --- a/mex/common/types/vocabulary.py +++ b/mex/common/types/vocabulary.py @@ -114,17 +114,22 @@ def __get_pydantic_core_schema__( cls, source_type: object, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add the vocabulary regex.""" - return core_schema.chain_schema( - [ - core_schema.no_info_plain_validator_function( - lambda v: v.value if isinstance(v, cls) else v - ), - core_schema.str_schema(pattern=VOCABULARY_PATTERN), - core_schema.no_info_plain_validator_function(cls), - ], + return core_schema.json_or_python_schema( + json_schema=core_schema.union_schema( + [ + core_schema.str_schema(pattern=VOCABULARY_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + ), + python_schema=core_schema.chain_schema( + [ + core_schema.is_instance_schema(cls | str), + core_schema.no_info_plain_validator_function(cls), + ] + ), serialization=core_schema.plain_serializer_function_ser_schema( lambda s: s.value, - return_schema=core_schema.str_schema(pattern=VOCABULARY_PATTERN), + when_used="unless-none", ), ) From 612c7cc4471eefd9b7ed1d89b7ff49f52d3a46d0 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Mon, 30 Sep 2024 16:28:42 +0200 Subject: [PATCH 12/19] fix url pattern --- mex/common/types/link.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mex/common/types/link.py b/mex/common/types/link.py index 9bca6a47..7ed49d54 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field, model_validator -URL_PATTERN = r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" +URL_PATTERN = r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?" class LinkLanguage(StrEnum): From f734ce4e05314a99984a13ef30b8dd9c629dce37 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <897972+cutoffthetop@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:13:21 +0200 Subject: [PATCH 13/19] Update tests/types/test_temporal_entity.py Co-authored-by: rababerladuseladim <rababerladuseladim@users.noreply.github.com> Signed-off-by: Nicolas Drebenstedt <897972+cutoffthetop@users.noreply.github.com> --- tests/types/test_temporal_entity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index d8e77c87..53d7dd42 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -266,7 +266,7 @@ class DummyModel(BaseModel): birthday: YearMonthDay -def test_email_schema() -> None: +def test_temporal_entity_schema() -> None: assert DummyModel.model_json_schema() == { "properties": { "birthday": { From dec495a0cbf48c84ca03b3a6629ec1b0b0f2769e Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Thu, 10 Oct 2024 16:00:48 +0200 Subject: [PATCH 14/19] CL --- CHANGELOG.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60a724a1..9778c60b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,14 +9,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- add pattern constants for vocabs, emails, urls and ids to types module +- add regex pattern to json schema of identifier fields +- automatically add examples and useScheme to json schema of enum fields + ### Changes - BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend - ensure identifier unions are typed to generic `Identifier` instead of the first match -- simplify pydantic schema configuration for all types + to signal that we don't actually know which of the union types is correct +- unify pydantic schema configuration for all types - consistently parse emails, identifiers and temporals in models to their type, not str - consistently serialize emails, ids and temporals in models to str, not their type -- make instances of Link type hashable +- make instances of Link type hashable, to harmonize them with Text models ### Deprecated From 794f7ab3ef0f5e8586c37b2bce2d34dca9255c0f Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Fri, 11 Oct 2024 09:29:54 +0200 Subject: [PATCH 15/19] remove post_models --- CHANGELOG.md | 1 + mex/common/backend_api/connector.py | 23 ----------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9778c60b..73871d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - BREAKING: drop support for parsing UUIDs as Identifiers, this was unused - BREAKING: drop support for parsing Links from markdown syntax, this was unused - BREAKING: remove pydantic1-style `validate` methods from all type models +- BREAKING: `BackendApiConnector.post_models` in favor of `post_extracted_items` ### Fixed diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 434e97ca..94731488 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -1,4 +1,3 @@ -from typing import cast from urllib.parse import urljoin from requests.exceptions import HTTPError @@ -19,7 +18,6 @@ AnyRuleSetResponse, ) from mex.common.settings import BaseSettings -from mex.common.types import AnyExtractedIdentifier class BackendApiConnector(HTTPConnector): @@ -41,27 +39,6 @@ def _set_url(self) -> None: settings = BaseSettings.get() self.url = urljoin(str(settings.backend_api_url), self.API_VERSION) - def post_models( - self, - extracted_items: list[AnyExtractedModel], - ) -> list[AnyExtractedIdentifier]: - """Post extracted models to the backend in bulk. - - Args: - extracted_items: Extracted models to post - - Raises: - HTTPError: If post was not accepted, crashes or times out - - Returns: - Identifiers of posted extracted models - """ - # XXX deprecated method, please use `post_extracted_items` instead - return cast( - list[AnyExtractedIdentifier], - self.post_extracted_items(extracted_items).identifiers, - ) - def post_extracted_items( self, extracted_items: list[AnyExtractedModel], From 4fd9566064826ef2e646adc42ba3cf53787f7909 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Fri, 11 Oct 2024 14:23:42 +0200 Subject: [PATCH 16/19] more linters --- mex/common/backend_api/connector.py | 5 +- mex/common/cli.py | 4 +- mex/common/connector/http.py | 14 +-- mex/common/context.py | 3 +- mex/common/extract.py | 8 +- mex/common/identity/registry.py | 8 +- mex/common/ldap/connector.py | 23 +++-- mex/common/ldap/extract.py | 3 +- mex/common/ldap/transform.py | 11 ++- mex/common/models/base/model.py | 15 +-- mex/common/models/consent.py | 5 +- mex/common/organigram/transform.py | 2 +- mex/common/settings.py | 2 +- mex/common/sinks/ndjson.py | 2 +- mex/common/transform.py | 12 ++- mex/common/types/email.py | 2 + mex/common/types/identifier.py | 2 + mex/common/types/link.py | 3 +- mex/common/types/path.py | 5 +- mex/common/types/temporal_entity.py | 33 ++++--- mex/common/types/text.py | 3 +- mex/common/utils.py | 5 +- mex/common/wikidata/connector.py | 22 ++--- mex/common/wikidata/extract.py | 19 ++-- pdm.lock | 140 ++++++++++++++-------------- pyproject.toml | 100 +++++++++++--------- tests/conftest.py | 4 +- tests/connector/test_http.py | 2 +- tests/identity/test_memory.py | 36 +++---- tests/ldap/conftest.py | 70 +++++++------- tests/ldap/test_connector.py | 4 +- tests/ldap/test_extract.py | 16 ++-- tests/ldap/test_transform.py | 2 +- tests/models/test_base.py | 6 +- tests/organigram/conftest.py | 8 +- tests/test_cli.py | 9 +- tests/test_settings.py | 2 +- tests/test_utils.py | 10 +- tests/testing/test_joker.py | 2 +- tests/types/test_temporal_entity.py | 14 +-- tests/types/test_vocabulary.py | 2 +- tests/wikidata/conftest.py | 4 +- tests/wikidata/test_connector.py | 4 +- tests/wikidata/test_convenience.py | 2 +- tests/wikidata/test_extract.py | 10 +- 45 files changed, 343 insertions(+), 315 deletions(-) diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 94731488..2ee350c1 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -145,7 +145,7 @@ def get_merged_item( Returns: A single merged item """ - # XXX stop-gap until the backend has a proper get merged item endpoint (MX-1669) + # TODO(ND): stop-gap until backend has proper get merged item endpoint (MX-1669) response = self.request( method="GET", endpoint="merged-item", @@ -158,7 +158,8 @@ def get_merged_item( try: return response_model.items[0] except IndexError: - raise HTTPError("merged item was not found") from None + msg = "merged item was not found" + raise HTTPError(msg) from None def preview_merged_item( self, diff --git a/mex/common/cli.py b/mex/common/cli.py index d9424f18..d17a529e 100644 --- a/mex/common/cli.py +++ b/mex/common/cli.py @@ -137,13 +137,13 @@ def _callback( func() except (Abort, BdbQuit, Exit, KeyboardInterrupt): # pragma: no cover context.exit(130) - except Exception as error: + except Exception: # an error occurred, let's print the traceback logger.error(click.style(format_exc(), fg="red")) if settings.debug: # pragma: no cover # if we are in debug mode, jump into interactive debugging. pdb.post_mortem(sys.exc_info()[2]) - raise error + raise # if not in debug mode, exit with code 1. logger.error("exit") context.exit(1) diff --git a/mex/common/connector/http.py b/mex/common/connector/http.py index e6e1ef12..716de1de 100644 --- a/mex/common/connector/http.py +++ b/mex/common/connector/http.py @@ -5,7 +5,7 @@ import backoff import requests -from requests import HTTPError, RequestException, Response +from requests import HTTPError, RequestException, Response, codes from mex.common.connector import BaseConnector from mex.common.settings import BaseSettings @@ -30,7 +30,7 @@ def _set_session(self) -> None: """Create and set request session.""" settings = BaseSettings.get() self.session = requests.Session() - self.session.verify = settings.verify_session # type: ignore + self.session.verify = settings.verify_session # type: ignore[assignment] def _set_authentication(self) -> None: """Authenticate to the host.""" @@ -94,23 +94,25 @@ def request( response=response, ) from error - if response.status_code == 204: + if response.status_code == codes.no_content: return {} return cast(dict[str, Any], response.json()) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code >= 500, + lambda response: cast(Response, response).status_code + >= codes.internal_server_error, max_tries=4, ) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code == 429, + lambda response: cast(Response, response).status_code + == codes.too_many_requests, max_tries=10, ) @backoff.on_predicate( backoff.fibo, - lambda response: cast(Response, response).status_code == 403, + lambda response: cast(Response, response).status_code == codes.forbidden, max_tries=10, ) @backoff.on_exception(backoff.fibo, RequestException, max_tries=6) diff --git a/mex/common/context.py b/mex/common/context.py index ba6707cf..96857f38 100644 --- a/mex/common/context.py +++ b/mex/common/context.py @@ -51,11 +51,12 @@ def load(self, cls: type[_SingletonT]) -> _SingletonT: self._singleton = cls() return self._singleton if not issubclass(type(self._singleton), cls): - raise RuntimeError( + msg = ( f"requested class ({cls}) is not a parent class of loaded class " f"({type(self._singleton)}). " f"Did you initialize {cls} upon startup?" ) + raise RuntimeError(msg) # noqa: TRY004 return self._singleton def push(self, instance: _SingletonT) -> None: diff --git a/mex/common/extract.py b/mex/common/extract.py index 688806ad..cb9755c2 100644 --- a/mex/common/extract.py +++ b/mex/common/extract.py @@ -59,10 +59,12 @@ def parse_csv( ) as reader: for chunk in reader: for index, row in chunk.iterrows(): - row.replace(to_replace=np.nan, value=None, inplace=True) - row.replace(regex=r"^\s*$", value=None, inplace=True) try: - model = into.model_validate(row.to_dict()) + model = into.model_validate( + row.replace(to_replace=np.nan, value=None) + .replace(regex=r"^\s*$", value=None) + .to_dict() + ) logger.info( "parse_csv - %s %s - OK", into.__name__, diff --git a/mex/common/identity/registry.py b/mex/common/identity/registry.py index b133ea6e..0525aa9f 100644 --- a/mex/common/identity/registry.py +++ b/mex/common/identity/registry.py @@ -21,7 +21,8 @@ def register_provider(key: Hashable, provider_cls: type[BaseProvider]) -> None: RuntimeError: When the `key` is already registered """ if key in _PROVIDER_REGISTRY: - raise RuntimeError(f"Already registered identity provider: {key}") + msg = f"Already registered identity provider: {key}" + raise RuntimeError(msg) _PROVIDER_REGISTRY[key] = provider_cls @@ -41,9 +42,8 @@ def get_provider() -> BaseProvider: if settings.identity_provider in _PROVIDER_REGISTRY: provider_cls = _PROVIDER_REGISTRY[settings.identity_provider] return provider_cls.get() - raise RuntimeError( - f"Identity provider not implemented: {settings.identity_provider}" - ) + msg = f"Identity provider not implemented: {settings.identity_provider}" + raise RuntimeError(msg) # register the default providers shipped with mex-common diff --git a/mex/common/ldap/connector.py b/mex/common/ldap/connector.py index debaf93b..6f93e716 100644 --- a/mex/common/ldap/connector.py +++ b/mex/common/ldap/connector.py @@ -43,7 +43,8 @@ def __init__(self) -> None: ) self._connection = connection.__enter__() if not self._is_service_available(): - raise MExError(f"LDAP service not available at url: {host}:{port}") + msg = f"LDAP service not available at url: {host}:{port}" + raise MExError(msg) def _is_service_available(self) -> bool: try: @@ -197,15 +198,17 @@ def get_functional_account( ) ) if not functional_accounts: - raise EmptySearchResultError( + msg = ( "Cannot find AD functional account for filters " f"'objectGUID: {objectGUID}, {filters}'" ) + raise EmptySearchResultError(msg) if len(functional_accounts) > 1: - raise FoundMoreThanOneError( + msg = ( "Found multiple AD functional accounts for filters " f"'objectGUID: {objectGUID}, {filters}'" ) + raise FoundMoreThanOneError(msg) return functional_accounts[0] def get_person( @@ -235,15 +238,17 @@ def get_person( ) ) if not persons: - raise EmptySearchResultError( + msg = ( f"Cannot find AD person for filters 'objectGUID: {objectGUID}, " f"employeeID: {employeeID}, {filters}'" ) + raise EmptySearchResultError(msg) if len(persons) > 1: - raise FoundMoreThanOneError( + msg = ( f"Found multiple AD persons for filters 'objectGUID: {objectGUID}, " f"employeeID: {employeeID}, {filters}'" ) + raise FoundMoreThanOneError(msg) return persons[0] def get_unit(self, **filters: str) -> LDAPUnit: @@ -260,9 +265,9 @@ def get_unit(self, **filters: str) -> LDAPUnit: """ units = list(self.get_units(**filters)) if not units: - raise EmptySearchResultError(f"Cannot find AD unit for filters '{filters}'") + msg = f"Cannot find AD unit for filters '{filters}'" + raise EmptySearchResultError(msg) if len(units) > 1: - raise FoundMoreThanOneError( - f"Found multiple AD units for filters '{filters}'" - ) + msg = f"Found multiple AD units for filters '{filters}'" + raise FoundMoreThanOneError(msg) return units[0] diff --git a/mex/common/ldap/extract.py b/mex/common/ldap/extract.py index 89854dd0..7fa371e2 100644 --- a/mex/common/ldap/extract.py +++ b/mex/common/ldap/extract.py @@ -28,7 +28,8 @@ def _get_merged_ids_by_attribute( MergedPersonIdentifiers """ if attribute not in LDAPPerson.model_fields: - raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}") + msg = f"Not a valid LDAPPerson field: {attribute}" + raise RuntimeError(msg) merged_ids_by_attribute = defaultdict(list) provider = get_provider() for person in persons: diff --git a/mex/common/ldap/transform.py b/mex/common/ldap/transform.py index 106c361d..29364450 100644 --- a/mex/common/ldap/transform.py +++ b/mex/common/ldap/transform.py @@ -100,15 +100,16 @@ def transform_ldap_person_to_mex_person( if d and (unit := units_by_identifier_in_primary_source.get(d.lower())) ] if not member_of: - raise MExError( + msg = ( "No unit or department found for LDAP department " f"'{ldap_person.department}' or departmentNumber " f"'{ldap_person.departmentNumber}'" ) + raise MExError(msg) return ExtractedPerson( identifierInPrimarySource=str(ldap_person.objectGUID), hadPrimarySource=primary_source.stableTargetId, - affiliation=[], # TODO resolve organization for person.company/RKI + affiliation=[], # TODO(HS): resolve organization for person.company/RKI email=ldap_person.mail, familyName=[ldap_person.sn], fullName=[ldap_person.displayName] if ldap_person.displayName else [], @@ -184,11 +185,11 @@ def analyse_person_string(string: str) -> list[PersonName]: return [name for strings in split for name in analyse_person_string(strings)] # split on comma if there is more than one - if len(split := re.split(r",", string)) > 2: + if len(split := re.split(r",", string)) > 2: # noqa: PLR2004 return [name for strings in split for name in analyse_person_string(strings)] # split on single commas only if there are more than three words - if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2: + if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2: # noqa: PLR2004 return [name for strings in split for name in analyse_person_string(strings)] # split into surname and given name @@ -209,7 +210,7 @@ def analyse_person_string(string: str) -> list[PersonName]: return [PersonName(surname=split[0], full_name=full_name)] # return surname and given name - if len(split) == 2: + if len(split) == 2: # noqa: PLR2004 return [PersonName(surname=split[1], given_name=split[0], full_name=full_name)] # found no one diff --git a/mex/common/models/base/model.py b/mex/common/models/base/model.py index e5ac4802..7092d140 100644 --- a/mex/common/models/base/model.py +++ b/mex/common/models/base/model.py @@ -143,7 +143,8 @@ def _convert_list_to_non_list(cls, field_name: str, value: list[Any]) -> Any: # if we have just one entry, we can safely unpack it return value[0] # we cannot unambiguously unpack more than one value - raise ValueError(f"got multiple values for {field_name}") + msg = f"got multiple values for {field_name}" + raise ValueError(msg) @classmethod def _fix_value_listyness_for_field(cls, field_name: str, value: Any) -> Any: @@ -186,10 +187,11 @@ def verify_computed_field_consistency( if not isinstance(data, MutableMapping): # data is not a dictionary: we can't "pop" values from that, # so we can't safely do a before/after comparison - raise AssertionError( + msg = ( "Input should be a valid dictionary, validating other types is not " "supported for models with computed fields." ) + raise AssertionError(msg) # noqa: TRY004 custom_values = { field: value for field in cls.model_computed_fields @@ -198,7 +200,8 @@ def verify_computed_field_consistency( result = handler(data) computed_values = result.model_dump(include=set(custom_values)) if computed_values != custom_values: - raise ValueError("Cannot set computed fields to custom values!") + msg = "Cannot set computed fields to custom values!" + raise ValueError(msg) return result @model_validator(mode="wrap") @@ -223,9 +226,9 @@ def fix_listyness(cls, data: Any, handler: ValidatorFunctionWrapHandler) -> Any: Returns: data with fixed list shapes """ - # XXX This needs to be a "wrap" validator that is defined *after* the computed - # field model validator, so it runs *before* the computed field validator. - # Sigh, see https://github.com/pydantic/pydantic/discussions/7434 + # TODO(ND): This needs to be a "wrap" validator that is defined *after* the + # computed field model validator, so it runs *before* the computed field + # validator. Sigh, see https://github.com/pydantic/pydantic/discussions/7434 if isinstance(data, MutableMapping): for name, value in data.items(): field_name = cls._get_alias_lookup().get(name, name) diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py index 3e62ee0b..f7c6e233 100644 --- a/mex/common/models/consent.py +++ b/mex/common/models/consent.py @@ -1,3 +1,2 @@ -# XXX this is a forward-compatibility hint for feature/model-update-v3: -# when this gets merged with model v3, remove the -# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields +# TODO(ND): when this gets merged with feature/model-update-v3, remove the +# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields diff --git a/mex/common/organigram/transform.py b/mex/common/organigram/transform.py index e3957740..829bb347 100644 --- a/mex/common/organigram/transform.py +++ b/mex/common/organigram/transform.py @@ -42,7 +42,7 @@ def transform_organigram_units_to_organizational_units( for extracted_unit in extracted_unit_by_id_in_primary_source.values(): identifier_in_primary_source = extracted_unit.identifierInPrimarySource - if ( + if ( # noqa: SIM102 parent_identifier_in_primary_source := parent_id_in_primary_source_by_id_in_primary_source.get( identifier_in_primary_source diff --git a/mex/common/settings.py b/mex/common/settings.py index fbdd7db4..e5fd3b6d 100644 --- a/mex/common/settings.py +++ b/mex/common/settings.py @@ -208,7 +208,7 @@ def get_env_name(cls, name: str) -> str: case_sensitive=cls.model_config.get("case_sensitive", False), env_prefix=cls.model_config.get("env_prefix", ""), ) - env_info = env_settings._extract_field_info(field, name) + env_info = env_settings._extract_field_info(field, name) # noqa: SLF001 return env_info[0][1].upper() @model_validator(mode="after") diff --git a/mex/common/sinks/ndjson.py b/mex/common/sinks/ndjson.py index 565101e7..619b5b63 100644 --- a/mex/common/sinks/ndjson.py +++ b/mex/common/sinks/ndjson.py @@ -35,7 +35,7 @@ def write_ndjson( handle = file_handles[class_name] except KeyError: file_name = Path(settings.work_dir, f"{class_name}.ndjson") - writer = open(file_name, "a+", encoding="utf-8") + writer = open(file_name, "a+", encoding="utf-8") # noqa: SIM115 file_handles[class_name] = handle = stack.enter_context(writer) logger.info( "write_ndjson - writing %s to file %s", diff --git a/mex/common/transform.py b/mex/common/transform.py index 521421a6..e394894e 100644 --- a/mex/common/transform.py +++ b/mex/common/transform.py @@ -16,7 +16,7 @@ class MExEncoder(json.JSONEncoder): """Custom JSON encoder that can handle pydantic models, enums and UUIDs.""" - def default(self, obj: Any) -> Any: + def default(self, obj: Any) -> Any: # noqa: PLR0911 """Implement custom serialization rules.""" if isinstance(obj, PydanticModel): return obj.model_dump() @@ -106,7 +106,9 @@ def to_key_and_values(dct: dict[str, Any]) -> Iterable[tuple[str, list[Any]]]: """Return an iterable of dictionary items where the values are always lists.""" for key, value in dct.items(): if value is None: - value = [] - elif not isinstance(value, list): - value = [value] - yield key, value + list_of_values = [] + elif isinstance(value, list): + list_of_values = value + else: + list_of_values = [value] + yield key, list_of_values diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 3a98df1b..32128d7c 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -9,6 +9,8 @@ class Email(str): """Email address of a person, organization or other entity.""" + __slots__ = () + @classmethod def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index c2537dee..483793c3 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -12,6 +12,8 @@ class Identifier(str): """Common identifier class based on UUID version 4.""" + __slots__ = () + @classmethod def generate(cls, seed: int | None = None) -> Self: """Generate a new identifier from a seed or random UUID version 4.""" diff --git a/mex/common/types/link.py b/mex/common/types/link.py index 7ed49d54..0fefddeb 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -42,7 +42,8 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return {"url": value} if isinstance(value, dict): return value - raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + msg = f"Allowed input types are dict and str, got {type(value)}" + raise ValueError(msg) def __hash__(self) -> int: """Return the hash of this link.""" diff --git a/mex/common/types/path.py b/mex/common/types/path.py index 6a1539d1..1dd93363 100644 --- a/mex/common/types/path.py +++ b/mex/common/types/path.py @@ -16,7 +16,7 @@ def __init__(self, path: Union[str, Path, "PathWrapper"]) -> None: if isinstance(path, str): path = Path(path) elif isinstance(path, PathWrapper): - path = path._path + path = path._path # noqa: SLF001 self._path = path @classmethod @@ -52,7 +52,8 @@ def __eq__(self, other: object) -> bool: """Return true for two PathWrappers with equal paths.""" if isinstance(other, PathWrapper): return self._path.__eq__(other._path) - raise TypeError(f"Can't compare {type(other)} with {type(self)}") + msg = f"Can't compare {type(other)} with {type(self)}" + raise TypeError(msg) def is_absolute(self) -> bool: """True if the underlying path is absolute.""" diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py index 3f591db0..f116d9cf 100644 --- a/mex/common/types/temporal_entity.py +++ b/mex/common/types/temporal_entity.py @@ -59,6 +59,7 @@ class TemporalEntityPrecision(Enum): YEAR_MONTH_DAY_TIME_REGEX = r"^[1-9]\d{3}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$" YEAR_MONTH_DAY_REGEX = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$" YEAR_MONTH_REGEX = r"^(?:\d{4}|(?:\d{4}-(?:0[1-9]|1[0-2])))$" +MAX_DATETIME_ARGUMENTS = 7 @total_ordering @@ -70,9 +71,7 @@ class TemporalEntity: precision: TemporalEntityPrecision date_time: datetime STR_SCHEMA_PATTERN = TEMPORAL_ENTITY_REGEX - ALLOWED_PRECISION_LEVELS = [ - key for key in TemporalEntityPrecision.__members__.values() - ] + ALLOWED_PRECISION_LEVELS = list(TemporalEntityPrecision.__members__.values()) JSON_SCHEMA_CONFIG: dict[str, str | list[str]] = { "examples": [ "2011", @@ -98,7 +97,7 @@ def __init__( tzinfo: tzinfo | None = None, ) -> None: ... # pragma: no cover - def __init__( + def __init__( # noqa: PLR0912 self, *args: Union[int, str, date, datetime, "TemporalEntity"], precision: TemporalEntityPrecision | None = None, @@ -123,18 +122,19 @@ def __init__( TemporalEntity(2009, 9, 30, 23, 59, 5, tzinfo=timezone("CET")) TemporalEntity(TemporalEntity(2000)) """ - if len(args) > 7: - raise TypeError( - f"Temporal entity takes at most 7 arguments ({len(args)} given)" + if len(args) > MAX_DATETIME_ARGUMENTS: + msg = ( + f"Temporal entity takes at most {MAX_DATETIME_ARGUMENTS} arguments " + f"({len(args)} given)" ) + raise TypeError(msg) if len(args) == 1 and isinstance( args[0], str | date | datetime | TemporalEntity ): if tzinfo: - raise TypeError( - "Temporal entity does not accept tzinfo in parsing mode" - ) + msg = "Temporal entity does not accept tzinfo in parsing mode" + raise TypeError(msg) if isinstance(args[0], TemporalEntity): date_time, parsed_precision = self._parse_temporal_entity(args[0]) elif isinstance(args[0], datetime): @@ -147,10 +147,11 @@ def __init__( args = cast(tuple[int, ...], args) date_time, parsed_precision = self._parse_integers(*args, tzinfo=tzinfo) else: - raise TypeError( + msg = ( "Temporal entity takes a single str, date, datetime or " "TemporalEntity argument or up to 7 integers" ) + raise TypeError(msg) if precision: self._validate_precision(precision) @@ -227,7 +228,7 @@ def _parse_integers( if tzinfo is None: tzinfo = CET padded = tuple(a or d for a, d in zip_longest(args, (1970, 1, 1, 0, 0, 0, 0))) - date_time = datetime(*padded, tzinfo=tzinfo) # type: ignore + date_time = datetime(*padded, tzinfo=tzinfo) # type: ignore[arg-type,misc] precision = TEMPORAL_ENTITY_PRECISIONS_BY_ARG_LENGTH[len(args)] return date_time, precision @@ -267,12 +268,14 @@ def _parse_date( value: date, ) -> tuple[datetime, TemporalEntityPrecision]: """Parse a date and assume the precision is days.""" - return datetime(value.year, value.month, value.day), TemporalEntityPrecision.DAY + return datetime( + value.year, value.month, value.day, tzinfo=CET + ), TemporalEntityPrecision.DAY - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """Return whether the given other value is the same as this one.""" try: - other_temporal = TemporalEntity(other) + other_temporal = TemporalEntity(other) # type: ignore[call-overload] except TypeError: return False return bool( diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 14f55f8f..ad4965b9 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -52,7 +52,8 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return {"value": value} if isinstance(value, dict): return value - raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + msg = f"Allowed input types are dict and str, got {type(value)}" + raise ValueError(msg) def __hash__(self) -> int: """Return the hash of Text.""" diff --git a/mex/common/utils.py b/mex/common/utils.py index 2638077f..c507d54d 100644 --- a/mex/common/utils.py +++ b/mex/common/utils.py @@ -25,10 +25,7 @@ def contains_any(base: Container[T], tokens: Iterable[T]) -> bool: """Check if a given base contains any of the given tokens.""" - for token in tokens: - if token in base: - return True - return False + return any(token in base for token in tokens) def any_contains_any(bases: Iterable[Container[T] | None], tokens: Iterable[T]) -> bool: diff --git a/mex/common/wikidata/connector.py b/mex/common/wikidata/connector.py index a055b744..42f7cce6 100644 --- a/mex/common/wikidata/connector.py +++ b/mex/common/wikidata/connector.py @@ -1,8 +1,11 @@ from functools import cache +from typing import cast from mex.common.connector.http import HTTPConnector from mex.common.settings import BaseSettings +_PROPS = "info|aliases|labels|descriptions|datatype|claims|sitelinks|sitelinks/urls" + class WikidataQueryServiceConnector(HTTPConnector): """Connector class to handle requesting the Wikidata Query Service.""" @@ -34,10 +37,8 @@ def get_data_by_query(self, query: str) -> list[dict[str, dict[str, str]]]: "User-Agent": f"{settings.mex_web_user_agent}", "Api-User-Agent": f"{settings.mex_web_user_agent}", } - results = self.request("GET", params=params, headers=headers) - - return results["results"]["bindings"] # type: ignore + return cast(list[dict[str, dict[str, str]]], results["results"]["bindings"]) class WikidataAPIConnector(HTTPConnector): @@ -69,18 +70,7 @@ def get_wikidata_item_details_by_id(self, item_id: str) -> dict[str, str]: "action": "wbgetentities", "format": "json", "ids": item_id, - "props": "|".join( - [ - "info", - "aliases", - "labels", - "descriptions", - "datatype", - "claims", - "sitelinks", - "sitelinks/urls", - ] - ), + "props": _PROPS, "formatversion": "2", } headers = { @@ -88,4 +78,4 @@ def get_wikidata_item_details_by_id(self, item_id: str) -> dict[str, str]: "Api-User-Agent": f"{settings.mex_web_user_agent}", } results = self.request("GET", params=params, headers=headers) - return results["entities"][item_id] # type: ignore + return cast(dict[str, str], results["entities"][item_id]) diff --git a/mex/common/wikidata/extract.py b/mex/common/wikidata/extract.py index 1aa1e5ff..e60a560f 100644 --- a/mex/common/wikidata/extract.py +++ b/mex/common/wikidata/extract.py @@ -50,8 +50,9 @@ def search_organization_by_label( try: wd_item_id = results[0]["item"]["value"].split("/")[-1] - except KeyError as exc: - raise MExError(f"KeyError: Error processing results for {item_label}") from exc + except KeyError as error: + msg = f"KeyError: Error processing results for {item_label}" + raise MExError(msg) from error return _get_organization_details(wd_item_id) @@ -134,14 +135,12 @@ def search_organizations_by_label( for item in results: try: wd_item_id = item["item"]["value"].split("/")[-1] - except KeyError as exc: - raise MExError( - f"KeyError: Error processing results for {item_label}" - ) from exc - except IndexError as exc: - raise MExError( - f"IndexError: Error processing results for {item_label}" - ) from exc + except KeyError as error: + msg = f"KeyError: Error processing results for {item_label}" + raise MExError(msg) from error + except IndexError as error: + msg = f"IndexError: Error processing results for {item_label}" + raise MExError(msg) from error yield _get_organization_details(wd_item_id) diff --git a/pdm.lock b/pdm.lock index 1001c9a4..bc355cea 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:e0d62ee3964c442167662793fd88ea6c3d6d6658b17073fd197b4786a1977d6d" +content_hash = "sha256:3f83cd6a4b6125f07ee36848e6b5455ffdb777df35af862880a829be46163e4b" [[metadata.targets]] requires_python = "==3.11.*" @@ -93,29 +93,29 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.3.2" +version = "3.4.0" requires_python = ">=3.7.0" summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." groups = ["default", "dev"] marker = "python_version == \"3.11\"" files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"}, + {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"}, + {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"}, ] [[package]] @@ -148,49 +148,49 @@ files = [ [[package]] name = "coverage" -version = "7.6.1" -requires_python = ">=3.8" +version = "7.6.2" +requires_python = ">=3.9" summary = "Code coverage measurement for Python" groups = ["dev"] marker = "python_version == \"3.11\"" files = [ - {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"}, - {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"}, - {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"}, - {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"}, - {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"}, + {file = "coverage-7.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb21bac7783c1bf6f4bbe68b1e0ff0d20e7e7732cfb7995bc8d96e23aa90fc7b"}, + {file = "coverage-7.6.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b2e437fbd8fae5bc7716b9c7ff97aecc95f0b4d56e4ca08b3c8d8adcaadb84"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:536f77f2bf5797983652d1d55f1a7272a29afcc89e3ae51caa99b2db4e89d658"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f361296ca7054f0936b02525646b2731b32c8074ba6defab524b79b2b7eeac72"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7926d8d034e06b479797c199747dd774d5e86179f2ce44294423327a88d66ca7"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0bbae11c138585c89fb4e991faefb174a80112e1a7557d507aaa07675c62e66b"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fcad7d5d2bbfeae1026b395036a8aa5abf67e8038ae7e6a25c7d0f88b10a8e6a"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f01e53575f27097d75d42de33b1b289c74b16891ce576d767ad8c48d17aeb5e0"}, + {file = "coverage-7.6.2-cp311-cp311-win32.whl", hash = "sha256:7781f4f70c9b0b39e1b129b10c7d43a4e0c91f90c60435e6da8288efc2b73438"}, + {file = "coverage-7.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:9bcd51eeca35a80e76dc5794a9dd7cb04b97f0e8af620d54711793bfc1fbba4b"}, + {file = "coverage-7.6.2.tar.gz", hash = "sha256:a5f81e68aa62bc0cfca04f7b19eaa8f9c826b53fc82ab9e2121976dc74f131f3"}, ] [[package]] name = "coverage" -version = "7.6.1" +version = "7.6.2" extras = ["toml"] -requires_python = ">=3.8" +requires_python = ">=3.9" summary = "Code coverage measurement for Python" groups = ["dev"] marker = "python_version == \"3.11\"" dependencies = [ - "coverage==7.6.1", + "coverage==7.6.2", "tomli; python_full_version <= \"3.11.0a6\"", ] files = [ - {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"}, - {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"}, - {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"}, - {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"}, - {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"}, - {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"}, - {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"}, + {file = "coverage-7.6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb21bac7783c1bf6f4bbe68b1e0ff0d20e7e7732cfb7995bc8d96e23aa90fc7b"}, + {file = "coverage-7.6.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b2e437fbd8fae5bc7716b9c7ff97aecc95f0b4d56e4ca08b3c8d8adcaadb84"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:536f77f2bf5797983652d1d55f1a7272a29afcc89e3ae51caa99b2db4e89d658"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f361296ca7054f0936b02525646b2731b32c8074ba6defab524b79b2b7eeac72"}, + {file = "coverage-7.6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7926d8d034e06b479797c199747dd774d5e86179f2ce44294423327a88d66ca7"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0bbae11c138585c89fb4e991faefb174a80112e1a7557d507aaa07675c62e66b"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fcad7d5d2bbfeae1026b395036a8aa5abf67e8038ae7e6a25c7d0f88b10a8e6a"}, + {file = "coverage-7.6.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f01e53575f27097d75d42de33b1b289c74b16891ce576d767ad8c48d17aeb5e0"}, + {file = "coverage-7.6.2-cp311-cp311-win32.whl", hash = "sha256:7781f4f70c9b0b39e1b129b10c7d43a4e0c91f90c60435e6da8288efc2b73438"}, + {file = "coverage-7.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:9bcd51eeca35a80e76dc5794a9dd7cb04b97f0e8af620d54711793bfc1fbba4b"}, + {file = "coverage-7.6.2.tar.gz", hash = "sha256:a5f81e68aa62bc0cfca04f7b19eaa8f9c826b53fc82ab9e2121976dc74f131f3"}, ] [[package]] @@ -393,23 +393,23 @@ files = [ [[package]] name = "markupsafe" -version = "2.1.5" -requires_python = ">=3.7" +version = "3.0.1" +requires_python = ">=3.9" summary = "Safely add untrusted strings to HTML/XML markup." groups = ["dev"] marker = "python_version == \"3.11\"" files = [ - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-win32.whl", hash = "sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635"}, + {file = "MarkupSafe-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf"}, + {file = "markupsafe-3.0.1.tar.gz", hash = "sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344"}, ] [[package]] @@ -533,7 +533,7 @@ files = [ [[package]] name = "pandas-stubs" -version = "2.2.2.240909" +version = "2.2.3.241009" requires_python = ">=3.10" summary = "Type annotations for pandas" groups = ["dev"] @@ -543,8 +543,8 @@ dependencies = [ "types-pytz>=2022.1.1", ] files = [ - {file = "pandas_stubs-2.2.2.240909-py3-none-any.whl", hash = "sha256:e230f5fa4065f9417804f4d65cd98f86c002efcc07933e8abcd48c3fad9c30a2"}, - {file = "pandas_stubs-2.2.2.240909.tar.gz", hash = "sha256:3c0951a2c3e45e3475aed9d80b7147ae82f176b9e42e9fb321cfdebf3d411b3d"}, + {file = "pandas_stubs-2.2.3.241009-py3-none-any.whl", hash = "sha256:3a6f8f142105a42550be677ba741ba532621f4e0acad2155c0e7b2450f114cfa"}, + {file = "pandas_stubs-2.2.3.241009.tar.gz", hash = "sha256:d4ab618253f0acf78a5d0d2bfd6dffdd92d91a56a69bdc8144e5a5c6d25be3b5"}, ] [[package]] @@ -903,7 +903,7 @@ files = [ [[package]] name = "sphinx" -version = "8.0.2" +version = "8.1.0" requires_python = ">=3.10" summary = "Python documentation generator" groups = ["dev"] @@ -919,17 +919,17 @@ dependencies = [ "packaging>=23.0", "requests>=2.30.0", "snowballstemmer>=2.2", - "sphinxcontrib-applehelp", - "sphinxcontrib-devhelp", - "sphinxcontrib-htmlhelp>=2.0.0", - "sphinxcontrib-jsmath", - "sphinxcontrib-qthelp", + "sphinxcontrib-applehelp>=1.0.7", + "sphinxcontrib-devhelp>=1.0.6", + "sphinxcontrib-htmlhelp>=2.0.6", + "sphinxcontrib-jsmath>=1.0.1", + "sphinxcontrib-qthelp>=1.0.6", "sphinxcontrib-serializinghtml>=1.1.9", "tomli>=2; python_version < \"3.11\"", ] files = [ - {file = "sphinx-8.0.2-py3-none-any.whl", hash = "sha256:56173572ae6c1b9a38911786e206a110c9749116745873feae4f9ce88e59391d"}, - {file = "sphinx-8.0.2.tar.gz", hash = "sha256:0cce1ddcc4fd3532cf1dd283bc7d886758362c5c1de6598696579ce96d8ffa5b"}, + {file = "sphinx-8.1.0-py3-none-any.whl", hash = "sha256:3202bba95697b9fc4371a07d6d457239de9860244ce235283149f817c253fd2f"}, + {file = "sphinx-8.1.0.tar.gz", hash = "sha256:109454425dbf4c78ecfdd481e56f078376d077edbda29804dba05c5161c8de06"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 289f9fba..0108f2e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,30 +6,30 @@ authors = [{ name = "MEx Team", email = "mex@rki.de" }] readme = { file = "README.md", content-type = "text/markdown" } license = { file = "LICENSE" } urls = { Repository = "https://github.com/robert-koch-institut/mex-common" } -requires-python = "<3.13,>=3.11" +requires-python = ">=3.11,<3.13" dependencies = [ "backoff>=2.2.1,<3", "click>=8.1.7,<9", "langdetect>=1.0.9,<2", "ldap3>=2.9.1,<3", "mex-model@git+https://github.com/robert-koch-institut/mex-model.git@2.5.0", - "numpy>=2.0.1,<3", - "pandas>=2.2.2,<3", + "numpy>=2.1.2,<3", + "pandas>=2.2.3,<3", "pyarrow>=17.0.0,<18", - "pydantic-settings>=2.4.0,<3", - "pydantic>=2.8.2,<3", + "pydantic-settings>=2.5.2,<3", + "pydantic>=2.9.2,<3", "pytz>=2024.1,<2024.2", "requests>=2.32.3,<3", ] optional-dependencies.dev = [ "ipdb>=0.13.13,<1", - "pandas-stubs>=2.2.2,<3", - "mypy>=1.11.0,<2", + "pandas-stubs>=2.2.3,<3", + "mypy>=1.11.2,<2", "pytest-cov>=5.0.0,<6", "pytest-random-order>=1.1.1,<2", "pytest-xdist>=3.6.1,<4", - "pytest>=8.3.1,<9", - "ruff>=0.5.4,<1", + "pytest>=8.3.3,<9", + "ruff>=0.6.5,<1", "sphinx>=8.0.2,<9", "types-ldap3>=2.9.13,<3", "types-pytz>=2024.1.0,<2025", @@ -97,49 +97,57 @@ docstring-code-format = true [tool.ruff.lint] ignore = [ - "D100", # Allow missing module docstring for brevity - "D104", # Allow missing package docstring for brevity - "D106", # Allow missing nested class docstring (eg pydantic Config) - "D203", # Disallow blank line before class docstring (inverse of D211) - "D213", # Disallow multi-line docstring starting at second line (inverse of D212) - "D406", # Allow section name ending with newline (google style compat) - "D407", # Allow missing dashed underline after section (google style compat) - "D413", # Allow missing blank line after last section (google style compat) - "N805", # Allow first argument of a method to be non-self (pydantic compat) - "N815", # Allow mixedCase variables in class scope (model compat) - "RUF012", # Allow mutable class attributes (pydantic compat) -] -select = [ - "A", # Flake8 builtin shaddow - "B", # BugBear bug and issue finder - "C90", # McCabe complexity checker - "D", # Python docstring style checker - "E", # Python code style errors - "ERA", # Commented-out code detector - "F", # Pyflakes passive python checker - "I", # Isort import utility - "N", # Pep8 naming conventions - "PERF", # Lint performance anti-patterns - "RET", # Flake8 return statement checker - "RUF", # Ruff-specific rules - "S", # Bandit automated security testing - "T10", # Flake8 debug statement checker - "T20", # Flake8 print statement checker - "UP", # PyUpgrade syntax recommender - "W", # Python code style warnings + "AIR", # Disable airflow specific rules (we are not using airflow) + "ANN", # Disable all annotations checks (handled by mypy) + "COM", # Disable flake8-commas checks (let ruff format handle that) + "CPY", # Disable copyright notice checks (we have LICENSE files) + "D100", # Allow missing module docstring (for brevity and speed) + "D104", # Allow missing package docstring (for brevity and speed) + "D203", # Disallow blank line before class docstring (inverse of D211) + "D213", # Disallow multi-line docstring starting at second line (inverse of D212) + "D406", # Allow section name ending with newline (google style compat) + "D407", # Allow missing dashed underline after section (google style compat) + "D413", # Allow missing blank line after last section (google style compat) + "DJ", # Disable django specific checks (we are not using django) + "FBT", # Disable boolean type hint checks (for more flexibility) + "FIX", # Allow committing with open TODOs (don't punish committers) + "N805", # Allow first argument of a method to be non-self (pydantic compat) + "N815", # Allow mixedCase variables in class scope (model compat) + "PTH123", # Allow using builtin open method (simpler than pathlib) + "RUF012", # Allow mutable class attributes (pydantic compat) + "SIM108", # Allow explicit if-else instead of ternary (easier to read) + "TD003", # Allow TODOs without ticket link (don't punish TODO writers) + "TRY003", # Allow long exception message at the raise site (for pydantic) ] +select = ["ALL"] [tool.ruff.lint.per-file-ignores] +"docs/**" = [ + "INP001", # Docs do not need to be a package +] +"mex/common/testing/**" = [ + "ARG001", # Allow unused function arguments for pytest plugin +] "tests/**" = [ - "D101", # Allow missing docstring in public class for tests - "D102", # Allow missing docstring in public method for tests - "D103", # Allow missing docstring in public function for tests - "D107", # Allow missing docstring in `__init__` for tests - "E501", # Allow line too long in tests - "N807", # Allow mocking `__init__` for tests - "S101", # Allow use of `assert` in tests + "ARG005", # Allow unused lambda arguments for mocking + "D101", # Allow missing docstring in public class + "D102", # Allow missing docstring in public method + "D103", # Allow missing docstring in public function + "D107", # Allow missing docstring in `__init__` + "E501", # Allow longer lines with test data + "ISC", # Allow implicitly concatenated strings + "N807", # Allow mocking `__init__` + "PLR0915", # Allow functions with many statements + "PLR2004", # Allow comparing with static values + "PT004", # Allow public fixtures without returns + "PT013", # Allow more flexible pytest imports + "S101", # Allow use of `assert` in tests + "SLF", # Allow private member access ] +[tool.ruff.lint.flake8-import-conventions.extend-aliases] +"reflex" = "rx" + [tool.ruff.lint.isort] known-first-party = ["mex", "tests"] diff --git a/tests/conftest.py b/tests/conftest.py index 34573fe3..220db0af 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,7 @@ pytest_plugins = ("mex.common.testing.plugin",) -@pytest.fixture +@pytest.fixture() def extracted_person() -> ExtractedPerson: """Return a dummy extracted person for testing purposes.""" return ExtractedPerson.model_construct( @@ -45,7 +45,7 @@ def extracted_person() -> ExtractedPerson: ) -@pytest.fixture +@pytest.fixture() def merged_person() -> MergedPerson: """Return a dummy merged person for testing purposes.""" return MergedPerson.model_construct( diff --git a/tests/connector/test_http.py b/tests/connector/test_http.py index 6abc94cd..dcdff325 100644 --- a/tests/connector/test_http.py +++ b/tests/connector/test_http.py @@ -17,7 +17,7 @@ def _check_availability(self) -> None: self.request("GET", "_system/check") -@pytest.fixture +@pytest.fixture() def mocked_dummy_session(monkeypatch: MonkeyPatch) -> MagicMock: """Mock the DummyHTTPConnector with a MagicMock session and return that.""" mocked_session = MagicMock(spec=requests.Session, name="dummy_session") diff --git a/tests/identity/test_memory.py b/tests/identity/test_memory.py index dad73272..41e10a4e 100644 --- a/tests/identity/test_memory.py +++ b/tests/identity/test_memory.py @@ -26,32 +26,32 @@ def test_assign() -> None: new_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert new_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=Joker(), - identifier=Joker(), - ) + assert new_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": Joker(), + "identifier": Joker(), + } found_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert found_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=new_identity.stableTargetId, - identifier=new_identity.identifier, - ) + assert found_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": new_identity.stableTargetId, + "identifier": new_identity.identifier, + } provider.close() provider = MemoryIdentityProvider.get() fresh_identity = provider.assign(had_primary_source, identifier_in_primary_source) - assert fresh_identity.model_dump() == dict( - hadPrimarySource=had_primary_source, - identifierInPrimarySource=identifier_in_primary_source, - stableTargetId=new_identity.stableTargetId, - identifier=new_identity.identifier, - ) + assert fresh_identity.model_dump() == { + "hadPrimarySource": had_primary_source, + "identifierInPrimarySource": identifier_in_primary_source, + "stableTargetId": new_identity.stableTargetId, + "identifier": new_identity.identifier, + } def test_fetch_empty() -> None: diff --git a/tests/ldap/conftest.py b/tests/ldap/conftest.py index d137e03c..b6cb2ca8 100644 --- a/tests/ldap/conftest.py +++ b/tests/ldap/conftest.py @@ -12,46 +12,46 @@ LDAPMocker = Callable[[PagedSearchResults], None] -SAMPLE_PERSON_ATTRS = dict( - company=["RKI"], - department=["XY"], - departmentNumber=["XY2"], - displayName=["Sample, Sam"], - employeeID=["1024"], - givenName=["Sam"], - mail=["SampleS@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000000}"], - ou=["XY"], - sAMAccountName=["SampleS"], - sn=["Sample"], -) +SAMPLE_PERSON_ATTRS = { + "company": ["RKI"], + "department": ["XY"], + "departmentNumber": ["XY2"], + "displayName": ["Sample, Sam"], + "employeeID": ["1024"], + "givenName": ["Sam"], + "mail": ["SampleS@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000000}"], + "ou": ["XY"], + "sAMAccountName": ["SampleS"], + "sn": ["Sample"], +} -XY_DEPARTMENT_ATTRS = dict( - mail=["XY@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000042}"], - sAMAccountName=["XY"], -) +XY_DEPARTMENT_ATTRS = { + "mail": ["XY@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000042}"], + "sAMAccountName": ["XY"], +} -XY2_DEPARTMENT_ATTRS = dict( - mail=["XY2@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000043}"], - sAMAccountName=["XY2"], -) +XY2_DEPARTMENT_ATTRS = { + "mail": ["XY2@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000043}"], + "sAMAccountName": ["XY2"], +} -XY_FUNC_ACCOUNT_ATTRS = dict( - mail=["XY@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000044}"], - sAMAccountName=["XY"], -) +XY_FUNC_ACCOUNT_ATTRS = { + "mail": ["XY@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000044}"], + "sAMAccountName": ["XY"], +} -XY2_FUNC_ACCOUNT_ATTRS = dict( - mail=["XY2@mail.tld"], - objectGUID=["{00000000-0000-4000-8000-000000000045}"], - sAMAccountName=["XY2"], -) +XY2_FUNC_ACCOUNT_ATTRS = { + "mail": ["XY2@mail.tld"], + "objectGUID": ["{00000000-0000-4000-8000-000000000045}"], + "sAMAccountName": ["XY2"], +} -@pytest.fixture +@pytest.fixture() def ldap_mocker(monkeypatch: MonkeyPatch) -> LDAPMocker: """Patch the LDAP connector to return `SAMPLE_PERSON_ATTRS` from its connection.""" @@ -60,7 +60,7 @@ def __init__(self: LDAPConnector) -> None: self._connection = MagicMock(spec=Connection, extend=Mock()) self._connection.extend.standard.paged_search = MagicMock( side_effect=[ - [dict(attributes=e) for e in entries] for entries in results + [{"attributes": e} for e in entries] for entries in results ] ) diff --git a/tests/ldap/test_connector.py b/tests/ldap/test_connector.py index 610a4dc2..2d3bd5b4 100644 --- a/tests/ldap/test_connector.py +++ b/tests/ldap/test_connector.py @@ -48,7 +48,7 @@ def test_get_persons_mocked(ldap_mocker: LDAPMocker) -> None: "nonexistent_person", ], ) -@pytest.mark.integration +@pytest.mark.integration() def test_get_persons_ldap(kwargs: dict[str, str], pattern: str) -> None: connector = LDAPConnector.get() persons = list(connector.get_persons(**kwargs)) @@ -68,7 +68,7 @@ def test_get_persons_ldap(kwargs: dict[str, str], pattern: str) -> None: "nonexistent_functional_account", ], ) -@pytest.mark.integration +@pytest.mark.integration() def test_get_functional_accounts_ldap(kwargs: dict[str, str], pattern: str) -> None: connector = LDAPConnector.get() functional_accounts = list(connector.get_functional_accounts(**kwargs)) diff --git a/tests/ldap/test_extract.py b/tests/ldap/test_extract.py index a3aa30a8..f52fc431 100644 --- a/tests/ldap/test_extract.py +++ b/tests/ldap/test_extract.py @@ -17,14 +17,14 @@ from tests.ldap.conftest import SAMPLE_PERSON_ATTRS, LDAPMocker -@pytest.fixture +@pytest.fixture() def ldap_primary_source( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> ExtractedPrimarySource: return extracted_primary_sources["ldap"] -@pytest.fixture +@pytest.fixture() def ldap_person_with_identity( ldap_primary_source: ExtractedPrimarySource, ) -> LDAPPerson: @@ -40,14 +40,14 @@ def ldap_person_with_identity( return person -@pytest.fixture +@pytest.fixture() def ldap_person_with_identity_with_query( ldap_person_with_identity: LDAPPerson, ) -> LDAPPersonWithQuery: return LDAPPersonWithQuery(person=ldap_person_with_identity, query="foo") -@pytest.fixture +@pytest.fixture() def ldap_person_without_identity() -> LDAPPerson: return LDAPPerson( objectGUID=UUID(int=2, version=4), @@ -58,14 +58,14 @@ def ldap_person_without_identity() -> LDAPPerson: ) -@pytest.fixture +@pytest.fixture() def ldap_person_without_identity_with_query( ldap_person_without_identity: LDAPPerson, ) -> LDAPPersonWithQuery: return LDAPPersonWithQuery(person=ldap_person_without_identity, query="foo") -@pytest.fixture +@pytest.fixture() def ldap_persons( ldap_person_with_identity: LDAPPerson, ldap_person_without_identity: LDAPPerson ) -> list[LDAPPerson]: @@ -75,7 +75,7 @@ def ldap_persons( ] -@pytest.fixture +@pytest.fixture() def ldap_persons_with_query( ldap_person_with_identity_with_query: LDAPPersonWithQuery, ldap_person_without_identity_with_query: LDAPPersonWithQuery, @@ -86,7 +86,7 @@ def ldap_persons_with_query( ] -@pytest.fixture +@pytest.fixture() def merged_id_of_person_with_identity( ldap_person_with_identity: LDAPPerson, ldap_primary_source: ExtractedPrimarySource, diff --git a/tests/ldap/test_transform.py b/tests/ldap/test_transform.py index 3bea0c05..c8eace14 100644 --- a/tests/ldap/test_transform.py +++ b/tests/ldap/test_transform.py @@ -15,7 +15,7 @@ from mex.common.testing import Joker -@pytest.fixture +@pytest.fixture() def extracted_unit( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> ExtractedOrganizationalUnit: diff --git a/tests/models/test_base.py b/tests/models/test_base.py index f008e28e..76b8994b 100644 --- a/tests/models/test_base.py +++ b/tests/models/test_base.py @@ -106,8 +106,8 @@ def test_base_model_listyness_fix( ) -> None: try: model = ComplexDummyModel.model_validate(data) - except Exception as error: - assert str(expected) in str(error) + except Exception as error: # noqa: BLE001 + assert str(expected) in str(error) # noqa: PT017 else: actual = model.model_dump() for key, value in expected.items(): @@ -125,7 +125,7 @@ class Shelter(Pet): with pytest.raises( ValidationError, match="Input should be a valid dictionary or instance of Pet" ): - Shelter(inhabitants="foo") # type: ignore + Shelter(inhabitants="foo") # type: ignore[call-arg] class Computer(BaseModel): diff --git a/tests/organigram/conftest.py b/tests/organigram/conftest.py index 9edc5b32..e16bc71a 100644 --- a/tests/organigram/conftest.py +++ b/tests/organigram/conftest.py @@ -8,7 +8,7 @@ from mex.common.types import Link, LinkLanguage, Text -@pytest.fixture +@pytest.fixture() def child_unit() -> OrganigramUnit: """Return a child unit corresponding to the test_data.""" return OrganigramUnit( @@ -28,7 +28,7 @@ def child_unit() -> OrganigramUnit: ) -@pytest.fixture +@pytest.fixture() def extracted_child_unit( child_unit: OrganigramUnit, extracted_primary_sources: dict[str, ExtractedPrimarySource], @@ -41,7 +41,7 @@ def extracted_child_unit( ) -@pytest.fixture +@pytest.fixture() def parent_unit() -> OrganigramUnit: """Return a parent unit corresponding to the test_data.""" return OrganigramUnit( @@ -62,7 +62,7 @@ def parent_unit() -> OrganigramUnit: ) -@pytest.fixture +@pytest.fixture() def extracted_parent_unit( parent_unit: OrganigramUnit, extracted_primary_sources: dict[str, ExtractedPrimarySource], diff --git a/tests/test_cli.py b/tests/test_cli.py index 1385a417..7ade9a37 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,6 +16,8 @@ class MyStr(str): """Dummy string subclass for _field_to_option test.""" + __slots__ = () + class MyEnum(Enum): """Dummy enum class for _field_to_option test.""" @@ -110,7 +112,10 @@ class MyEnum(Enum): create_model( "OptionalFlagSettings", __base__=BaseSettings, - optional_flag=(bool, Field(False, description="This flag is optional")), + optional_flag=( + bool, + Field(default=False, description="This flag is optional"), + ), ), { "name": "optional_flag", @@ -169,7 +174,7 @@ class MyEnum(Enum): __base__=BaseSettings, union_field=( bool | str, - Field(True, description="String or boolean"), + Field(default=True, description="String or boolean"), ), ), { diff --git a/tests/test_settings.py b/tests/test_settings.py index 2a4d375e..7c5c0dcf 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -52,7 +52,7 @@ def test_settings_getting_caches_singleton() -> None: assert settings_fetched_again is settings -@pytest.mark.integration +@pytest.mark.integration() def test_parse_env_file() -> None: settings = BaseSettings.get() # "work_dir" and "assets_dir" are always set, assert that more than these two are diff --git a/tests/test_utils.py b/tests/test_utils.py index aa3b047d..d379aea7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -55,7 +55,7 @@ def test_any_contains_any(base: Any, tokens: Iterable[Any], expected: bool) -> N @pytest.mark.parametrize( ("annotation", "types", "expected"), - ( + [ (None, [str], False), (str, [str], True), (str, [Identifier], False), @@ -63,7 +63,7 @@ def test_any_contains_any(base: Any, tokens: Iterable[Any], expected: bool) -> N (list[str | int | list[str]], [str, float], False), (list[str | int | list[str]], [int, str], True), (MergedPersonIdentifier | None, MERGED_IDENTIFIER_CLASSES, True), - ), + ], ids=[ "static None", "simple str", @@ -85,7 +85,7 @@ class DummyModel(BaseModel): @pytest.mark.parametrize( ("annotation", "flags", "expected_types"), - ( + [ (str, {}, [str]), (None, {}, [NoneType]), (None, {"include_none": False}, []), @@ -103,7 +103,7 @@ class DummyModel(BaseModel): {"unpack_literal": False}, [Literal["okay"], NoneType], ), - ), + ], ids=[ "string", "None allowing None", @@ -141,7 +141,7 @@ class PseudoModel(BaseModel): @pytest.mark.parametrize( ("string", "expected"), - (("", ""), ("__XYZ__", "xyz"), ("/foo/BAR$42", "foo bar 42")), + [("", ""), ("__XYZ__", "xyz"), ("/foo/BAR$42", "foo bar 42")], ) def test_normalize(string: str, expected: str) -> None: assert normalize(string) == expected diff --git a/tests/testing/test_joker.py b/tests/testing/test_joker.py index 715fc40e..3af85287 100644 --- a/tests/testing/test_joker.py +++ b/tests/testing/test_joker.py @@ -2,7 +2,7 @@ def test_joker_eq() -> None: - assert Joker() == None # noqa + assert Joker() == None # noqa: E711 assert Joker() == 1 assert {"foo": Joker()} == {"foo": ["bar", Joker()]} diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 53d7dd42..3fc47110 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -20,7 +20,7 @@ ("args", "kwargs", "message"), [ ( - (datetime.now(),), + (datetime.now(tz=UTC),), {"tzinfo": UTC}, "Temporal entity does not accept tzinfo in parsing mode", ), @@ -155,7 +155,7 @@ def test_temporal_entity_value_errors( ), ( TemporalEntity, - (datetime(2020, 3, 22, 14, 30, 58),), + (datetime(2020, 3, 22, 14, 30, 58, tzinfo=UTC),), {}, 'TemporalEntity("2020-03-22T13:30:58Z")', ), @@ -183,13 +183,13 @@ def test_temporal_entity_value_errors( ), ( TemporalEntity, - (datetime(2004, 11, 19, 00, 00),), + (datetime(2004, 11, 19, 00, 00, tzinfo=UTC),), {"precision": TemporalEntityPrecision.DAY}, 'TemporalEntity("2004-11-19")', ), ( YearMonth, - (datetime(2004, 11, 19, 00, 00),), + (datetime(2004, 11, 19, 00, 00, tzinfo=UTC),), {"precision": TemporalEntityPrecision.YEAR}, 'YearMonth("2004")', ), @@ -227,7 +227,7 @@ def test_temporal_entity_eq() -> None: assert TemporalEntity(2004, 11) == TemporalEntity(2004, 11) assert TemporalEntity(2004, 11, 2) == "2004-11-02" assert TemporalEntity(2020, 3, 22, 14, 30, 58, 0) == datetime( - 2020, 3, 22, 14, 30, 58, 0 + 2020, 3, 22, 14, 30, 58, 0, tzinfo=UTC ) assert TemporalEntity(2005) != object() @@ -236,7 +236,9 @@ def test_temporal_entity_gt() -> None: assert TemporalEntity(2004) > TemporalEntity("2003") assert TemporalEntity(2004, 11) < "2013-10-02" assert TemporalEntity(2004, 11) <= TemporalEntity(2004, 12) - assert TemporalEntity(2020, 3, 22, 14, 30, 58) >= datetime(2020, 3, 22, 14, 29) + assert TemporalEntity(2020, 3, 22, 14, 30, 58) >= datetime( + 2020, 3, 22, 14, 29, tzinfo=UTC + ) with pytest.raises(NotImplementedError): assert TemporalEntity(2005) > object() diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py index 1e2d0d40..7b48aedd 100644 --- a/tests/types/test_vocabulary.py +++ b/tests/types/test_vocabulary.py @@ -17,7 +17,7 @@ def test_split_to_caps(string: str, expected: str) -> None: assert split_to_caps(string) == expected -@pytest.fixture +@pytest.fixture() def use_dummy_vocabulary(monkeypatch: MonkeyPatch) -> None: dummy_vocabulary = VocabularyLoader.parse_file( str(TESTDATA_DIR / "dummy-vocabulary.json") diff --git a/tests/wikidata/conftest.py b/tests/wikidata/conftest.py index ab0fc580..c02ab436 100644 --- a/tests/wikidata/conftest.py +++ b/tests/wikidata/conftest.py @@ -13,7 +13,7 @@ TESTDATA_DIR = Path(__file__).parent / "test_data" -@pytest.fixture +@pytest.fixture() def mocked_session_wikidata_query_service(monkeypatch: MonkeyPatch) -> MagicMock: """Mock and return WikidataQueryServiceConnector with a MagicMock session.""" mocked_session = MagicMock(spec=requests.Session) @@ -25,7 +25,7 @@ def __init__(self: WikidataQueryServiceConnector) -> None: return mocked_session -@pytest.fixture +@pytest.fixture() def mocked_session_wikidata_api(monkeypatch: MonkeyPatch) -> MagicMock: """Mock and return WikidataAPIConnector with a MagicMock session.""" mocked_session = MagicMock(spec=requests.Session) diff --git a/tests/wikidata/test_connector.py b/tests/wikidata/test_connector.py index 48154114..b7e4aeb0 100644 --- a/tests/wikidata/test_connector.py +++ b/tests/wikidata/test_connector.py @@ -20,7 +20,7 @@ def test_initialization_mocked_server( assert connector._check_availability() is None -@pytest.mark.integration +@pytest.mark.integration() def test_get_data_by_query() -> None: """Test if items can be searched providing a label.""" expected = [ @@ -110,7 +110,7 @@ def test_get_data_by_query_mocked( assert response == expected -@pytest.mark.integration +@pytest.mark.integration() def test_get_wikidata_item_details_by_id() -> None: """Test if items details can be fetched by its ID.""" connector = WikidataAPIConnector.get() diff --git a/tests/wikidata/test_convenience.py b/tests/wikidata/test_convenience.py index 7d7e7660..4950cb4b 100644 --- a/tests/wikidata/test_convenience.py +++ b/tests/wikidata/test_convenience.py @@ -119,7 +119,7 @@ def test_get_merged_organization_id_by_query_with_extract_transform_and_load_moc load_function.assert_not_called() -@pytest.mark.integration +@pytest.mark.integration() def test_get_merged_organization_id_by_query_with_extract_transform_and_load( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> None: diff --git a/tests/wikidata/test_extract.py b/tests/wikidata/test_extract.py index 20c98fcb..b379bdbc 100644 --- a/tests/wikidata/test_extract.py +++ b/tests/wikidata/test_extract.py @@ -19,7 +19,7 @@ from tests.wikidata.conftest import TESTDATA_DIR -@pytest.mark.integration +@pytest.mark.integration() def test_search_organization_by_label() -> None: expected = "Q679041" @@ -29,7 +29,7 @@ def test_search_organization_by_label() -> None: assert search_result.identifier == expected -@pytest.mark.integration +@pytest.mark.integration() def test_search_organizations_by_label() -> None: search_result = list( search_organizations_by_label( @@ -50,7 +50,7 @@ def test_search_organizations_by_label() -> None: assert search_result[0].labels.model_dump() == labels -@pytest.mark.integration +@pytest.mark.integration() def test_get_count_of_found_organizations_by_label() -> None: total_found_orgs = get_count_of_found_organizations_by_label( item_label='Robert Koch Institute"', @@ -60,7 +60,7 @@ def test_get_count_of_found_organizations_by_label() -> None: assert total_found_orgs == 3 -@pytest.mark.integration +@pytest.mark.integration() def test_search_organization_by_label_for_none() -> None: """Test if None is returned when multiple organizations are found.""" search_result = search_organization_by_label( @@ -425,7 +425,7 @@ def mocked_query_response() -> list[dict[str, dict[str, str]]]: assert search_result == 3 -@pytest.mark.integration +@pytest.mark.integration() def test_get_organization_details() -> None: expected = { "identifier": "Q679041", From 2a802f47ff4c750be41542855e2af650458ee838 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Fri, 11 Oct 2024 14:41:00 +0200 Subject: [PATCH 17/19] fix tests --- tests/types/test_temporal_entity.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 3fc47110..978fc773 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -155,7 +155,7 @@ def test_temporal_entity_value_errors( ), ( TemporalEntity, - (datetime(2020, 3, 22, 14, 30, 58, tzinfo=UTC),), + (datetime(2020, 3, 22, 14, 30, 58, tzinfo=CET),), {}, 'TemporalEntity("2020-03-22T13:30:58Z")', ), @@ -177,19 +177,19 @@ def test_temporal_entity_value_errors( ), ( YearMonthDayTime, - (YearMonthDayTime(2004, 11, 21, 19, 59, tzinfo=timezone("UTC")),), + (YearMonthDayTime(2004, 11, 21, 19, 59, tzinfo=UTC),), {}, 'YearMonthDayTime("2004-11-21T19:59:00Z")', ), ( TemporalEntity, - (datetime(2004, 11, 19, 00, 00, tzinfo=UTC),), + (datetime(2004, 11, 19, 00, 00, tzinfo=CET),), {"precision": TemporalEntityPrecision.DAY}, 'TemporalEntity("2004-11-19")', ), ( YearMonth, - (datetime(2004, 11, 19, 00, 00, tzinfo=UTC),), + (datetime(2004, 11, 19, 00, 00, tzinfo=CET),), {"precision": TemporalEntityPrecision.YEAR}, 'YearMonth("2004")', ), @@ -226,7 +226,7 @@ def test_temporal_entity_eq() -> None: assert TemporalEntity(2004) == TemporalEntity("2004") assert TemporalEntity(2004, 11) == TemporalEntity(2004, 11) assert TemporalEntity(2004, 11, 2) == "2004-11-02" - assert TemporalEntity(2020, 3, 22, 14, 30, 58, 0) == datetime( + assert TemporalEntity(2020, 3, 22, 14, 30, 58, 0, tzinfo=UTC) == datetime( 2020, 3, 22, 14, 30, 58, 0, tzinfo=UTC ) assert TemporalEntity(2005) != object() @@ -236,7 +236,7 @@ def test_temporal_entity_gt() -> None: assert TemporalEntity(2004) > TemporalEntity("2003") assert TemporalEntity(2004, 11) < "2013-10-02" assert TemporalEntity(2004, 11) <= TemporalEntity(2004, 12) - assert TemporalEntity(2020, 3, 22, 14, 30, 58) >= datetime( + assert TemporalEntity(2020, 3, 22, 14, 30, 58, tzinfo=UTC) >= datetime( 2020, 3, 22, 14, 29, tzinfo=UTC ) From b7b4fdf0d1337c85edbefd3054c76474ddf67541 Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Fri, 11 Oct 2024 14:42:25 +0200 Subject: [PATCH 18/19] lint --- tests/backend_api/conftest.py | 2 +- tests/conftest.py | 8 ++++---- tests/connector/test_http.py | 2 +- tests/ldap/conftest.py | 2 +- tests/ldap/test_connector.py | 4 ++-- tests/ldap/test_extract.py | 16 ++++++++-------- tests/ldap/test_transform.py | 2 +- tests/organigram/conftest.py | 8 ++++---- tests/test_settings.py | 2 +- tests/types/test_vocabulary.py | 2 +- tests/wikidata/conftest.py | 4 ++-- tests/wikidata/test_connector.py | 4 ++-- tests/wikidata/test_convenience.py | 2 +- tests/wikidata/test_extract.py | 10 +++++----- 14 files changed, 34 insertions(+), 34 deletions(-) diff --git a/tests/backend_api/conftest.py b/tests/backend_api/conftest.py index 6096e441..e2be02de 100644 --- a/tests/backend_api/conftest.py +++ b/tests/backend_api/conftest.py @@ -6,7 +6,7 @@ from mex.common.backend_api.connector import BackendApiConnector -@pytest.fixture() +@pytest.fixture def mocked_backend(monkeypatch: MonkeyPatch) -> MagicMock: """Return the mocked request dispatch method of backend connector.""" mocked_send_request = MagicMock( diff --git a/tests/conftest.py b/tests/conftest.py index 220db0af..511a5189 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,7 @@ pytest_plugins = ("mex.common.testing.plugin",) -@pytest.fixture() +@pytest.fixture def extracted_person() -> ExtractedPerson: """Return a dummy extracted person for testing purposes.""" return ExtractedPerson.model_construct( @@ -45,7 +45,7 @@ def extracted_person() -> ExtractedPerson: ) -@pytest.fixture() +@pytest.fixture def merged_person() -> MergedPerson: """Return a dummy merged person for testing purposes.""" return MergedPerson.model_construct( @@ -64,7 +64,7 @@ def merged_person() -> MergedPerson: ) -@pytest.fixture() +@pytest.fixture def rule_set_request() -> PersonRuleSetRequest: """Return a dummy person rule set request for testing purposes.""" return PersonRuleSetRequest( @@ -74,7 +74,7 @@ def rule_set_request() -> PersonRuleSetRequest: ) -@pytest.fixture() +@pytest.fixture def rule_set_response() -> PersonRuleSetResponse: """Return a dummy person rule set response for testing purposes.""" return PersonRuleSetResponse( diff --git a/tests/connector/test_http.py b/tests/connector/test_http.py index dcdff325..6abc94cd 100644 --- a/tests/connector/test_http.py +++ b/tests/connector/test_http.py @@ -17,7 +17,7 @@ def _check_availability(self) -> None: self.request("GET", "_system/check") -@pytest.fixture() +@pytest.fixture def mocked_dummy_session(monkeypatch: MonkeyPatch) -> MagicMock: """Mock the DummyHTTPConnector with a MagicMock session and return that.""" mocked_session = MagicMock(spec=requests.Session, name="dummy_session") diff --git a/tests/ldap/conftest.py b/tests/ldap/conftest.py index b6cb2ca8..edea5fcb 100644 --- a/tests/ldap/conftest.py +++ b/tests/ldap/conftest.py @@ -51,7 +51,7 @@ } -@pytest.fixture() +@pytest.fixture def ldap_mocker(monkeypatch: MonkeyPatch) -> LDAPMocker: """Patch the LDAP connector to return `SAMPLE_PERSON_ATTRS` from its connection.""" diff --git a/tests/ldap/test_connector.py b/tests/ldap/test_connector.py index 2d3bd5b4..610a4dc2 100644 --- a/tests/ldap/test_connector.py +++ b/tests/ldap/test_connector.py @@ -48,7 +48,7 @@ def test_get_persons_mocked(ldap_mocker: LDAPMocker) -> None: "nonexistent_person", ], ) -@pytest.mark.integration() +@pytest.mark.integration def test_get_persons_ldap(kwargs: dict[str, str], pattern: str) -> None: connector = LDAPConnector.get() persons = list(connector.get_persons(**kwargs)) @@ -68,7 +68,7 @@ def test_get_persons_ldap(kwargs: dict[str, str], pattern: str) -> None: "nonexistent_functional_account", ], ) -@pytest.mark.integration() +@pytest.mark.integration def test_get_functional_accounts_ldap(kwargs: dict[str, str], pattern: str) -> None: connector = LDAPConnector.get() functional_accounts = list(connector.get_functional_accounts(**kwargs)) diff --git a/tests/ldap/test_extract.py b/tests/ldap/test_extract.py index f52fc431..a3aa30a8 100644 --- a/tests/ldap/test_extract.py +++ b/tests/ldap/test_extract.py @@ -17,14 +17,14 @@ from tests.ldap.conftest import SAMPLE_PERSON_ATTRS, LDAPMocker -@pytest.fixture() +@pytest.fixture def ldap_primary_source( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> ExtractedPrimarySource: return extracted_primary_sources["ldap"] -@pytest.fixture() +@pytest.fixture def ldap_person_with_identity( ldap_primary_source: ExtractedPrimarySource, ) -> LDAPPerson: @@ -40,14 +40,14 @@ def ldap_person_with_identity( return person -@pytest.fixture() +@pytest.fixture def ldap_person_with_identity_with_query( ldap_person_with_identity: LDAPPerson, ) -> LDAPPersonWithQuery: return LDAPPersonWithQuery(person=ldap_person_with_identity, query="foo") -@pytest.fixture() +@pytest.fixture def ldap_person_without_identity() -> LDAPPerson: return LDAPPerson( objectGUID=UUID(int=2, version=4), @@ -58,14 +58,14 @@ def ldap_person_without_identity() -> LDAPPerson: ) -@pytest.fixture() +@pytest.fixture def ldap_person_without_identity_with_query( ldap_person_without_identity: LDAPPerson, ) -> LDAPPersonWithQuery: return LDAPPersonWithQuery(person=ldap_person_without_identity, query="foo") -@pytest.fixture() +@pytest.fixture def ldap_persons( ldap_person_with_identity: LDAPPerson, ldap_person_without_identity: LDAPPerson ) -> list[LDAPPerson]: @@ -75,7 +75,7 @@ def ldap_persons( ] -@pytest.fixture() +@pytest.fixture def ldap_persons_with_query( ldap_person_with_identity_with_query: LDAPPersonWithQuery, ldap_person_without_identity_with_query: LDAPPersonWithQuery, @@ -86,7 +86,7 @@ def ldap_persons_with_query( ] -@pytest.fixture() +@pytest.fixture def merged_id_of_person_with_identity( ldap_person_with_identity: LDAPPerson, ldap_primary_source: ExtractedPrimarySource, diff --git a/tests/ldap/test_transform.py b/tests/ldap/test_transform.py index c8eace14..3bea0c05 100644 --- a/tests/ldap/test_transform.py +++ b/tests/ldap/test_transform.py @@ -15,7 +15,7 @@ from mex.common.testing import Joker -@pytest.fixture() +@pytest.fixture def extracted_unit( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> ExtractedOrganizationalUnit: diff --git a/tests/organigram/conftest.py b/tests/organigram/conftest.py index e16bc71a..9edc5b32 100644 --- a/tests/organigram/conftest.py +++ b/tests/organigram/conftest.py @@ -8,7 +8,7 @@ from mex.common.types import Link, LinkLanguage, Text -@pytest.fixture() +@pytest.fixture def child_unit() -> OrganigramUnit: """Return a child unit corresponding to the test_data.""" return OrganigramUnit( @@ -28,7 +28,7 @@ def child_unit() -> OrganigramUnit: ) -@pytest.fixture() +@pytest.fixture def extracted_child_unit( child_unit: OrganigramUnit, extracted_primary_sources: dict[str, ExtractedPrimarySource], @@ -41,7 +41,7 @@ def extracted_child_unit( ) -@pytest.fixture() +@pytest.fixture def parent_unit() -> OrganigramUnit: """Return a parent unit corresponding to the test_data.""" return OrganigramUnit( @@ -62,7 +62,7 @@ def parent_unit() -> OrganigramUnit: ) -@pytest.fixture() +@pytest.fixture def extracted_parent_unit( parent_unit: OrganigramUnit, extracted_primary_sources: dict[str, ExtractedPrimarySource], diff --git a/tests/test_settings.py b/tests/test_settings.py index 7c5c0dcf..2a4d375e 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -52,7 +52,7 @@ def test_settings_getting_caches_singleton() -> None: assert settings_fetched_again is settings -@pytest.mark.integration() +@pytest.mark.integration def test_parse_env_file() -> None: settings = BaseSettings.get() # "work_dir" and "assets_dir" are always set, assert that more than these two are diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py index 7b48aedd..1e2d0d40 100644 --- a/tests/types/test_vocabulary.py +++ b/tests/types/test_vocabulary.py @@ -17,7 +17,7 @@ def test_split_to_caps(string: str, expected: str) -> None: assert split_to_caps(string) == expected -@pytest.fixture() +@pytest.fixture def use_dummy_vocabulary(monkeypatch: MonkeyPatch) -> None: dummy_vocabulary = VocabularyLoader.parse_file( str(TESTDATA_DIR / "dummy-vocabulary.json") diff --git a/tests/wikidata/conftest.py b/tests/wikidata/conftest.py index c02ab436..ab0fc580 100644 --- a/tests/wikidata/conftest.py +++ b/tests/wikidata/conftest.py @@ -13,7 +13,7 @@ TESTDATA_DIR = Path(__file__).parent / "test_data" -@pytest.fixture() +@pytest.fixture def mocked_session_wikidata_query_service(monkeypatch: MonkeyPatch) -> MagicMock: """Mock and return WikidataQueryServiceConnector with a MagicMock session.""" mocked_session = MagicMock(spec=requests.Session) @@ -25,7 +25,7 @@ def __init__(self: WikidataQueryServiceConnector) -> None: return mocked_session -@pytest.fixture() +@pytest.fixture def mocked_session_wikidata_api(monkeypatch: MonkeyPatch) -> MagicMock: """Mock and return WikidataAPIConnector with a MagicMock session.""" mocked_session = MagicMock(spec=requests.Session) diff --git a/tests/wikidata/test_connector.py b/tests/wikidata/test_connector.py index b7e4aeb0..48154114 100644 --- a/tests/wikidata/test_connector.py +++ b/tests/wikidata/test_connector.py @@ -20,7 +20,7 @@ def test_initialization_mocked_server( assert connector._check_availability() is None -@pytest.mark.integration() +@pytest.mark.integration def test_get_data_by_query() -> None: """Test if items can be searched providing a label.""" expected = [ @@ -110,7 +110,7 @@ def test_get_data_by_query_mocked( assert response == expected -@pytest.mark.integration() +@pytest.mark.integration def test_get_wikidata_item_details_by_id() -> None: """Test if items details can be fetched by its ID.""" connector = WikidataAPIConnector.get() diff --git a/tests/wikidata/test_convenience.py b/tests/wikidata/test_convenience.py index 4950cb4b..7d7e7660 100644 --- a/tests/wikidata/test_convenience.py +++ b/tests/wikidata/test_convenience.py @@ -119,7 +119,7 @@ def test_get_merged_organization_id_by_query_with_extract_transform_and_load_moc load_function.assert_not_called() -@pytest.mark.integration() +@pytest.mark.integration def test_get_merged_organization_id_by_query_with_extract_transform_and_load( extracted_primary_sources: dict[str, ExtractedPrimarySource], ) -> None: diff --git a/tests/wikidata/test_extract.py b/tests/wikidata/test_extract.py index b379bdbc..20c98fcb 100644 --- a/tests/wikidata/test_extract.py +++ b/tests/wikidata/test_extract.py @@ -19,7 +19,7 @@ from tests.wikidata.conftest import TESTDATA_DIR -@pytest.mark.integration() +@pytest.mark.integration def test_search_organization_by_label() -> None: expected = "Q679041" @@ -29,7 +29,7 @@ def test_search_organization_by_label() -> None: assert search_result.identifier == expected -@pytest.mark.integration() +@pytest.mark.integration def test_search_organizations_by_label() -> None: search_result = list( search_organizations_by_label( @@ -50,7 +50,7 @@ def test_search_organizations_by_label() -> None: assert search_result[0].labels.model_dump() == labels -@pytest.mark.integration() +@pytest.mark.integration def test_get_count_of_found_organizations_by_label() -> None: total_found_orgs = get_count_of_found_organizations_by_label( item_label='Robert Koch Institute"', @@ -60,7 +60,7 @@ def test_get_count_of_found_organizations_by_label() -> None: assert total_found_orgs == 3 -@pytest.mark.integration() +@pytest.mark.integration def test_search_organization_by_label_for_none() -> None: """Test if None is returned when multiple organizations are found.""" search_result = search_organization_by_label( @@ -425,7 +425,7 @@ def mocked_query_response() -> list[dict[str, dict[str, str]]]: assert search_result == 3 -@pytest.mark.integration() +@pytest.mark.integration def test_get_organization_details() -> None: expected = { "identifier": "Q679041", From af9b3c1996b3b524f17e739fe3a3e6fe8fa2ec4d Mon Sep 17 00:00:00 2001 From: Nicolas Drebenstedt <drebenstedtn@rki.de> Date: Fri, 11 Oct 2024 15:05:04 +0200 Subject: [PATCH 19/19] CL --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72973411..256f6f25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changes +- make ruff linter config opt-out, instead of opt-in + ### Deprecated ### Removed