forked from tokern/piicatcher
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature: Support plugins for detectors of PII type.
Detectors detect PII. Change PII Type to class hierarchy instead of enums. With this change new PII types can be defined. Support adding plugins using entry points for new detectors. Remove spacy detector and convert it into plugin hosted in another repository. Fix tokern#115
- Loading branch information
Showing
14 changed files
with
565 additions
and
942 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,70 @@ | ||
# flake8: noqa | ||
__version__ = "0.18.2" | ||
|
||
from dbcat.catalog.pii_types import PiiType | ||
|
||
|
||
class Phone(PiiType): | ||
name = "Phone" | ||
type = "phone" | ||
pass | ||
|
||
|
||
class Email(PiiType): | ||
name = "Email" | ||
type = "email" | ||
pass | ||
|
||
|
||
class CreditCard(PiiType, type="credit_card"): # type: ignore | ||
name = "Credit Card" | ||
type = "credit_card" | ||
pass | ||
|
||
|
||
class Address(PiiType): | ||
name = "Address" | ||
type = "address" | ||
pass | ||
|
||
|
||
class Person(PiiType): | ||
name = "Person" | ||
type = "person" | ||
pass | ||
|
||
|
||
class BirthDate(PiiType, type="birth_date"): # type: ignore | ||
name = "Birth Date" | ||
type = "birth_date" | ||
pass | ||
|
||
|
||
class Gender(PiiType): | ||
name = "Gender" | ||
type = "gender" | ||
pass | ||
|
||
|
||
class Nationality(PiiType): | ||
name = "Nationality" | ||
type = "nationality" | ||
pass | ||
|
||
|
||
class SSN(PiiType): | ||
name = "SSN" | ||
type = "ssn" | ||
pass | ||
|
||
|
||
class UserName(PiiType, type="user_name"): # type: ignore | ||
name = "User Name" | ||
type = "user_name" | ||
pass | ||
|
||
|
||
class Password(PiiType): | ||
name = "Password" | ||
type = "password" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import inspect | ||
from abc import ABC, abstractmethod | ||
from typing import Optional, Type | ||
|
||
import catalogue | ||
from dbcat.catalog.models import CatColumn | ||
from dbcat.catalog.pii_types import PiiType | ||
|
||
|
||
class Detector(ABC): | ||
"""Scanner abstract class that defines required methods""" | ||
|
||
name: str | ||
|
||
pass | ||
|
||
|
||
class MetadataDetector(Detector): | ||
@abstractmethod | ||
def detect(self, column: CatColumn) -> Optional[PiiType]: | ||
"""Scan the text and return an array of PiiTypes that are found""" | ||
|
||
|
||
class DatumDetector(Detector): | ||
@abstractmethod | ||
def detect(self, column: CatColumn, datum: str) -> Optional[PiiType]: | ||
"""Scan the text and return an array of PiiTypes that are found""" | ||
|
||
|
||
detector_registry = catalogue.create("piicatcher", "detectors", entry_points=True) | ||
|
||
|
||
def register_detector(detector: Type["Detector"]) -> Type["Detector"]: | ||
"""Register a detector for use. | ||
You can use ``register_detector(NewDetector)`` after your detector definition to automatically | ||
register it. | ||
.. code:: pycon | ||
>>> import piicatcher | ||
>>> class NewDetector(piicatcher.detectors.Detector): | ||
... pass | ||
>>> piicatcher.detectors.register_detector(NewDetector) | ||
<class 'piicatcher.detectors.catalogue.NewDetector'> | ||
:param detector: The ``Detector`` to register with the scrubadub detector configuration. | ||
:type detector: Detector class | ||
""" | ||
if not inspect.isclass(detector): | ||
raise ValueError("detector should be a class, not an instance.") | ||
|
||
detector_registry.register(detector.name, func=detector) | ||
|
||
return detector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.