feature/add more linters (#302)

# PR Context - trying out more ruff linters - hoping this will lead to less bugs, better maintainability, more consistency - if it works out ok, we'll move the config to mex-template too # Changes - make ruff linter config opt-out, instead of opt-in --------- Signed-off-by: Nicolas Drebenstedt <897972+cutoffthetop@users.noreply.github.com> Co-authored-by: rababerladuseladim <rababerladuseladim@users.noreply.github.com>
robert-koch-institut · Oct 14, 2024 · a31cc54 · a31cc54
1 parent c428ac4
commit a31cc54
Show file tree

Hide file tree

Showing 36 changed files with 246 additions and 216 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changes
 
+- make ruff linter config opt-out, instead of opt-in
+
 ### Deprecated
 
 ### Removed

diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py
@@ -145,7 +145,7 @@ def get_merged_item(
         Returns:
             A single merged item
         """
-        # XXX stop-gap until the backend has a proper get merged item endpoint (MX-1669)
+        # TODO(ND): stop-gap until backend has proper get merged item endpoint (MX-1669)
         response = self.request(
             method="GET",
             endpoint="merged-item",
@@ -158,7 +158,8 @@ def get_merged_item(
         try:
             return response_model.items[0]
         except IndexError:
-            raise HTTPError("merged item was not found") from None
+            msg = "merged item was not found"
+            raise HTTPError(msg) from None
 
     def preview_merged_item(
         self,

diff --git a/mex/common/cli.py b/mex/common/cli.py
@@ -137,13 +137,13 @@ def _callback(
         func()
     except (Abort, BdbQuit, Exit, KeyboardInterrupt):  # pragma: no cover
         context.exit(130)
-    except Exception as error:
+    except Exception:
         # an error occurred, let's print the traceback
         logger.error(click.style(format_exc(), fg="red"))
         if settings.debug:  # pragma: no cover
             # if we are in debug mode, jump into interactive debugging.
             pdb.post_mortem(sys.exc_info()[2])
-            raise error
+            raise
         # if not in debug mode, exit with code 1.
         logger.error("exit")
         context.exit(1)

diff --git a/mex/common/connector/http.py b/mex/common/connector/http.py
@@ -5,7 +5,7 @@
 
 import backoff
 import requests
-from requests import HTTPError, RequestException, Response
+from requests import HTTPError, RequestException, Response, codes
 
 from mex.common.connector import BaseConnector
 from mex.common.settings import BaseSettings
@@ -30,7 +30,7 @@ def _set_session(self) -> None:
         """Create and set request session."""
         settings = BaseSettings.get()
         self.session = requests.Session()
-        self.session.verify = settings.verify_session  # type: ignore
+        self.session.verify = settings.verify_session  # type: ignore[assignment]
 
     def _set_authentication(self) -> None:
         """Authenticate to the host."""
@@ -94,23 +94,25 @@ def request(
                 response=response,
             ) from error
 
-        if response.status_code == 204:
+        if response.status_code == codes.no_content:
             return {}
         return cast(dict[str, Any], response.json())
 
     @backoff.on_predicate(
         backoff.fibo,
-        lambda response: cast(Response, response).status_code >= 500,
+        lambda response: cast(Response, response).status_code
+        >= codes.internal_server_error,
         max_tries=4,
     )
     @backoff.on_predicate(
         backoff.fibo,
-        lambda response: cast(Response, response).status_code == 429,
+        lambda response: cast(Response, response).status_code
+        == codes.too_many_requests,
         max_tries=10,
     )
     @backoff.on_predicate(
         backoff.fibo,
-        lambda response: cast(Response, response).status_code == 403,
+        lambda response: cast(Response, response).status_code == codes.forbidden,
         max_tries=10,
     )
     @backoff.on_exception(backoff.fibo, RequestException, max_tries=6)

diff --git a/mex/common/context.py b/mex/common/context.py
@@ -51,11 +51,12 @@ def load(self, cls: type[_SingletonT]) -> _SingletonT:
             self._singleton = cls()
             return self._singleton
         if not issubclass(type(self._singleton), cls):
-            raise RuntimeError(
+            msg = (
                 f"requested class ({cls}) is not a parent class of loaded class "
                 f"({type(self._singleton)}). "
                 f"Did you initialize {cls} upon startup?"
             )
+            raise RuntimeError(msg)  # noqa: TRY004
         return self._singleton
 
     def push(self, instance: _SingletonT) -> None:

diff --git a/mex/common/extract.py b/mex/common/extract.py
@@ -59,10 +59,12 @@ def parse_csv(
     ) as reader:
         for chunk in reader:
             for index, row in chunk.iterrows():
-                row.replace(to_replace=np.nan, value=None, inplace=True)
-                row.replace(regex=r"^\s*$", value=None, inplace=True)
                 try:
-                    model = into.model_validate(row.to_dict())
+                    model = into.model_validate(
+                        row.replace(to_replace=np.nan, value=None)
+                        .replace(regex=r"^\s*$", value=None)
+                        .to_dict()
+                    )
                     logger.info(
                         "parse_csv - %s %s - OK",
                         into.__name__,

diff --git a/mex/common/identity/registry.py b/mex/common/identity/registry.py
@@ -21,7 +21,8 @@ def register_provider(key: Hashable, provider_cls: type[BaseProvider]) -> None:
         RuntimeError: When the `key` is already registered
     """
     if key in _PROVIDER_REGISTRY:
-        raise RuntimeError(f"Already registered identity provider: {key}")
+        msg = f"Already registered identity provider: {key}"
+        raise RuntimeError(msg)
     _PROVIDER_REGISTRY[key] = provider_cls
 
 
@@ -41,9 +42,8 @@ def get_provider() -> BaseProvider:
     if settings.identity_provider in _PROVIDER_REGISTRY:
         provider_cls = _PROVIDER_REGISTRY[settings.identity_provider]
         return provider_cls.get()
-    raise RuntimeError(
-        f"Identity provider not implemented: {settings.identity_provider}"
-    )
+    msg = f"Identity provider not implemented: {settings.identity_provider}"
+    raise RuntimeError(msg)
 
 
 # register the default providers shipped with mex-common

diff --git a/mex/common/ldap/connector.py b/mex/common/ldap/connector.py
@@ -43,7 +43,8 @@ def __init__(self) -> None:
         )
         self._connection = connection.__enter__()
         if not self._is_service_available():
-            raise MExError(f"LDAP service not available at url: {host}:{port}")
+            msg = f"LDAP service not available at url: {host}:{port}"
+            raise MExError(msg)
 
     def _is_service_available(self) -> bool:
         try:
@@ -197,15 +198,17 @@ def get_functional_account(
             )
         )
         if not functional_accounts:
-            raise EmptySearchResultError(
+            msg = (
                 "Cannot find AD functional account for filters "
                 f"'objectGUID: {objectGUID}, {filters}'"
             )
+            raise EmptySearchResultError(msg)
         if len(functional_accounts) > 1:
-            raise FoundMoreThanOneError(
+            msg = (
                 "Found multiple AD functional accounts for filters "
                 f"'objectGUID: {objectGUID}, {filters}'"
             )
+            raise FoundMoreThanOneError(msg)
         return functional_accounts[0]
 
     def get_person(
@@ -235,15 +238,17 @@ def get_person(
             )
         )
         if not persons:
-            raise EmptySearchResultError(
+            msg = (
                 f"Cannot find AD person for filters 'objectGUID: {objectGUID}, "
                 f"employeeID: {employeeID}, {filters}'"
             )
+            raise EmptySearchResultError(msg)
         if len(persons) > 1:
-            raise FoundMoreThanOneError(
+            msg = (
                 f"Found multiple AD persons for filters 'objectGUID: {objectGUID}, "
                 f"employeeID: {employeeID}, {filters}'"
             )
+            raise FoundMoreThanOneError(msg)
         return persons[0]
 
     def get_unit(self, **filters: str) -> LDAPUnit:
@@ -260,9 +265,9 @@ def get_unit(self, **filters: str) -> LDAPUnit:
         """
         units = list(self.get_units(**filters))
         if not units:
-            raise EmptySearchResultError(f"Cannot find AD unit for filters '{filters}'")
+            msg = f"Cannot find AD unit for filters '{filters}'"
+            raise EmptySearchResultError(msg)
         if len(units) > 1:
-            raise FoundMoreThanOneError(
-                f"Found multiple AD units for filters '{filters}'"
-            )
+            msg = f"Found multiple AD units for filters '{filters}'"
+            raise FoundMoreThanOneError(msg)
         return units[0]
diff --git a/mex/common/ldap/extract.py b/mex/common/ldap/extract.py
@@ -28,7 +28,8 @@ def _get_merged_ids_by_attribute(
         MergedPersonIdentifiers
     """
     if attribute not in LDAPPerson.model_fields:
-        raise RuntimeError(f"Not a valid LDAPPerson field: {attribute}")
+        msg = f"Not a valid LDAPPerson field: {attribute}"
+        raise RuntimeError(msg)
     merged_ids_by_attribute = defaultdict(list)
     provider = get_provider()
     for person in persons:

diff --git a/mex/common/ldap/transform.py b/mex/common/ldap/transform.py
@@ -100,15 +100,16 @@ def transform_ldap_person_to_mex_person(
         if d and (unit := units_by_identifier_in_primary_source.get(d.lower()))
     ]
     if not member_of:
-        raise MExError(
+        msg = (
             "No unit or department found for LDAP department "
             f"'{ldap_person.department}' or departmentNumber "
             f"'{ldap_person.departmentNumber}'"
         )
+        raise MExError(msg)
     return ExtractedPerson(
         identifierInPrimarySource=str(ldap_person.objectGUID),
         hadPrimarySource=primary_source.stableTargetId,
-        affiliation=[],  # TODO resolve organization for person.company/RKI
+        affiliation=[],  # TODO(HS): resolve organization for person.company/RKI
         email=ldap_person.mail,
         familyName=[ldap_person.sn],
         fullName=[ldap_person.displayName] if ldap_person.displayName else [],
@@ -184,11 +185,11 @@ def analyse_person_string(string: str) -> list[PersonName]:
         return [name for strings in split for name in analyse_person_string(strings)]
 
     # split on comma if there is more than one
-    if len(split := re.split(r",", string)) > 2:
+    if len(split := re.split(r",", string)) > 2:  # noqa: PLR2004
         return [name for strings in split for name in analyse_person_string(strings)]
 
     # split on single commas only if there are more than three words
-    if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2:
+    if len(split := re.split(r",", string)) == 2 and string.strip().count(" ") > 2:  # noqa: PLR2004
         return [name for strings in split for name in analyse_person_string(strings)]
 
     # split into surname and given name
@@ -209,7 +210,7 @@ def analyse_person_string(string: str) -> list[PersonName]:
         return [PersonName(surname=split[0], full_name=full_name)]
 
     # return surname and given name
-    if len(split) == 2:
+    if len(split) == 2:  # noqa: PLR2004
         return [PersonName(surname=split[1], given_name=split[0], full_name=full_name)]
 
     # found no one

diff --git a/mex/common/models/base/model.py b/mex/common/models/base/model.py
@@ -143,7 +143,8 @@ def _convert_list_to_non_list(cls, field_name: str, value: list[Any]) -> Any:
             # if we have just one entry, we can safely unpack it
             return value[0]
         # we cannot unambiguously unpack more than one value
-        raise ValueError(f"got multiple values for {field_name}")
+        msg = f"got multiple values for {field_name}"
+        raise ValueError(msg)
 
     @classmethod
     def _fix_value_listyness_for_field(cls, field_name: str, value: Any) -> Any:
@@ -186,10 +187,11 @@ def verify_computed_field_consistency(
         if not isinstance(data, MutableMapping):
             # data is not a dictionary: we can't "pop" values from that,
             # so we can't safely do a before/after comparison
-            raise AssertionError(
+            msg = (
                 "Input should be a valid dictionary, validating other types is not "
                 "supported for models with computed fields."
             )
+            raise AssertionError(msg)  # noqa: TRY004
         custom_values = {
             field: value
             for field in cls.model_computed_fields
@@ -198,7 +200,8 @@ def verify_computed_field_consistency(
         result = handler(data)
         computed_values = result.model_dump(include=set(custom_values))
         if computed_values != custom_values:
-            raise ValueError("Cannot set computed fields to custom values!")
+            msg = "Cannot set computed fields to custom values!"
+            raise ValueError(msg)
         return result
 
     @model_validator(mode="wrap")
@@ -223,9 +226,9 @@ def fix_listyness(cls, data: Any, handler: ValidatorFunctionWrapHandler) -> Any:
         Returns:
             data with fixed list shapes
         """
-        # XXX This needs to be a "wrap" validator that is defined *after* the computed
-        #     field model validator, so it runs *before* the computed field validator.
-        #     Sigh, see https://github.com/pydantic/pydantic/discussions/7434
+        # TODO(ND): This needs to be a "wrap" validator that is defined *after* the
+        # computed field model validator, so it runs *before* the computed field
+        # validator. Sigh, see https://github.com/pydantic/pydantic/discussions/7434
         if isinstance(data, MutableMapping):
             for name, value in data.items():
                 field_name = cls._get_alias_lookup().get(name, name)

diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py
@@ -1,3 +1,2 @@
-# XXX this is a forward-compatibility hint for feature/model-update-v3:
-#     when this gets merged with model v3, remove the
-#     `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields
+# TODO(ND): when this gets merged with feature/model-update-v3, remove the
+# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields
diff --git a/mex/common/organigram/transform.py b/mex/common/organigram/transform.py
@@ -42,7 +42,7 @@ def transform_organigram_units_to_organizational_units(
 
     for extracted_unit in extracted_unit_by_id_in_primary_source.values():
         identifier_in_primary_source = extracted_unit.identifierInPrimarySource
-        if (
+        if (  # noqa: SIM102
             parent_identifier_in_primary_source
             := parent_id_in_primary_source_by_id_in_primary_source.get(
                 identifier_in_primary_source

diff --git a/mex/common/settings.py b/mex/common/settings.py
@@ -208,7 +208,7 @@ def get_env_name(cls, name: str) -> str:
             case_sensitive=cls.model_config.get("case_sensitive", False),
             env_prefix=cls.model_config.get("env_prefix", ""),
         )
-        env_info = env_settings._extract_field_info(field, name)
+        env_info = env_settings._extract_field_info(field, name)  # noqa: SLF001
         return env_info[0][1].upper()
 
     @model_validator(mode="after")

diff --git a/mex/common/sinks/ndjson.py b/mex/common/sinks/ndjson.py
@@ -35,7 +35,7 @@ def write_ndjson(
                 handle = file_handles[class_name]
             except KeyError:
                 file_name = Path(settings.work_dir, f"{class_name}.ndjson")
-                writer = open(file_name, "a+", encoding="utf-8")
+                writer = open(file_name, "a+", encoding="utf-8")  # noqa: SIM115
                 file_handles[class_name] = handle = stack.enter_context(writer)
                 logger.info(
                     "write_ndjson - writing %s to file %s",

diff --git a/mex/common/transform.py b/mex/common/transform.py
@@ -16,7 +16,7 @@
 class MExEncoder(json.JSONEncoder):
     """Custom JSON encoder that can handle pydantic models, enums and UUIDs."""
 
-    def default(self, obj: Any) -> Any:
+    def default(self, obj: Any) -> Any:  # noqa: PLR0911
         """Implement custom serialization rules."""
         if isinstance(obj, PydanticModel):
             return obj.model_dump()
@@ -106,7 +106,9 @@ def to_key_and_values(dct: dict[str, Any]) -> Iterable[tuple[str, list[Any]]]:
     """Return an iterable of dictionary items where the values are always lists."""
     for key, value in dct.items():
         if value is None:
-            value = []
-        elif not isinstance(value, list):
-            value = [value]
-        yield key, value
+            list_of_values = []
+        elif isinstance(value, list):
+            list_of_values = value
+        else:
+            list_of_values = [value]
+        yield key, list_of_values
diff --git a/mex/common/types/email.py b/mex/common/types/email.py
@@ -9,6 +9,8 @@
 class Email(str):
     """Email address of a person, organization or other entity."""
 
+    __slots__ = ()
+
     @classmethod
     def __get_pydantic_core_schema__(
         cls, source_type: Any, handler: GetCoreSchemaHandler

diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py
@@ -12,6 +12,8 @@
 class Identifier(str):
     """Common identifier class based on UUID version 4."""
 
+    __slots__ = ()
+
     @classmethod
     def generate(cls, seed: int | None = None) -> Self:
         """Generate a new identifier from a seed or random UUID version 4."""

diff --git a/mex/common/types/link.py b/mex/common/types/link.py
@@ -42,7 +42,8 @@ def validate_strings(cls, value: Any) -> dict[str, Any]:
             return {"url": value}
         if isinstance(value, dict):
             return value
-        raise ValueError(f"Allowed input types are dict and str, got {type(value)}")
+        msg = f"Allowed input types are dict and str, got {type(value)}"
+        raise ValueError(msg)
 
     def __hash__(self) -> int:
         """Return the hash of this link."""