Skip to content

Commit

Permalink
add option to pseudonymizer to use different encryption modes (#600)
Browse files Browse the repository at this point in the history
* add mode option to pseudonymizer
* move encrypter to pseudo module
* update changelog
* update documentation
* add test for pseudo tools workflow with different modes
* implement __str__ methods in Pseudonyms

---------

Co-authored-by: dtrai2 <95028228+dtrai2@users.noreply.github.com>
  • Loading branch information
ekneg54 and dtrai2 committed Jun 12, 2024
1 parent e9584d3 commit d2f22ee
Show file tree
Hide file tree
Showing 19 changed files with 849 additions and 460 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* add logprep http output connector
* add pseudonymization tools to logprep -> see: `logprep pseudo --help`
* add `restart_count` parameter to configuration
* add option `mode` to `pseudonymizer` processor and to pseudonymization tools to chose the AES Mode for encryption and decryption

### Improvements

Expand Down
6 changes: 3 additions & 3 deletions doc/source/user_manual/execution.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ Pseudonymization Tools

Logprep provides tools to pseudonymize and depseudonymize values. This can be useful for testing
and debugging purposes. But this can also be used to depseudonymize values pseudonymized by
Logpreps :code:`Pseudonymizer` Processor.
Logprep :code:`Pseudonymizer` Processor.

These tools can be used to pseudonymize given strings using the same method as used in Logprep
and provides functionality to depseudonymize values using a pair of keys.
Expand All @@ -157,10 +157,10 @@ pseudonymize

.. code-block:: bash
logprep pseudo pseudonymize analyst depseudo mystring
logprep pseudo pseudonymize analyst.crt depseudo.crt mystring
This will pseudonymize the provided string using the analyst and depseudo keys.
get help with :code:`logperp pseudo pseudonymize --help`
get help with :code:`logprep pseudo pseudonymize --help`

depseudonymize
^^^^^^^^^^^^^^
Expand Down
72 changes: 0 additions & 72 deletions logprep/processor/pseudonymizer/encrypter.py

This file was deleted.

33 changes: 26 additions & 7 deletions logprep/processor/pseudonymizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
Pseudonymizer
=============
The `pseudonymizer` is a processor that pseudonymizes certain fields of log messages to ensure
The :code:`pseudonymizer` is a processor that pseudonymizes certain fields of log messages to ensure
privacy regulations can be adhered to.
.. security-best-practice::
:title: Processor - Pseudonymizer
The `pseudonymizer` works with two public keys for different roles.
The :code:`pseudonymizer` works with two public keys for different roles.
It is suggested to ensure that two different keys are being used such that the separation of the
roles can be maintained.
It is suggested to use the :code:`GCM` mode for encryption as it decouples the key length of the
depseudo and analyst keys. This leads to additional 152 bytes of overhead for the encryption
compared to the :code:`CTR` mode encrypter.
Processor Configuration
^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: yaml
Expand All @@ -30,6 +34,7 @@
hash_salt: secret_salt
regex_mapping: /path/to/regex_mapping.json
max_cached_pseudonyms: 1000000
mode: GCM
tld_lists:
-/path/to/tld_list.dat
Expand All @@ -55,11 +60,15 @@
from logprep.abc.processor import Processor
from logprep.metrics.metrics import CounterMetric, GaugeMetric
from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.pseudonymizer.encrypter import DualPKCS1HybridEncrypter
from logprep.processor.pseudonymizer.rule import PseudonymizerRule
from logprep.util.getter import GetterFactory
from logprep.util.hasher import SHA256Hasher
from logprep.util.helper import add_field_to, get_dotted_field_value
from logprep.util.pseudo.encrypter import (
DualPKCS1HybridCTREncrypter,
DualPKCS1HybridGCMEncrypter,
Encrypter,
)
from logprep.util.validators import list_of_urls_validator


Expand Down Expand Up @@ -133,6 +142,13 @@ class Config(FieldManager.Config):
a default list will be retrieved online and cached in a local directory. For local
files the path has to be given with :code:`file:///path/to/file.dat`."""

mode: str = field(
validator=[validators.instance_of(str), validators.in_(("GCM", "CTR"))], default="GCM"
)
"""Optional mode of operation for the encryption. Can be either 'GCM' or 'CTR'.
Default is 'GCM'.
"""

@define(kw_only=True)
class Metrics(Processor.Metrics):
"""Tracks statistics about the Pseudonymizer"""
Expand Down Expand Up @@ -190,10 +206,13 @@ def _hasher(self):
return SHA256Hasher()

@cached_property
def _encrypter(self) -> DualPKCS1HybridEncrypter:
_encrypter = DualPKCS1HybridEncrypter()
_encrypter.load_public_keys(self._config.pubkey_analyst, self._config.pubkey_depseudo)
return _encrypter
def _encrypter(self) -> Encrypter:
if self._config.mode == "CTR":
encrypter = DualPKCS1HybridCTREncrypter()
else:
encrypter = DualPKCS1HybridGCMEncrypter()
encrypter.load_public_keys(self._config.pubkey_analyst, self._config.pubkey_depseudo)
return encrypter

@cached_property
def _tld_extractor(self) -> TLDExtract:
Expand Down
2 changes: 2 additions & 0 deletions logprep/util/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class EXITCODES(Enum):
DEFAULT_CONFIG_LOCATION = "file:///etc/logprep/pipeline.yml"
DEFAULT_LOG_FORMAT = "%(asctime)-15s %(process)-6s %(name)-10s %(levelname)-8s: %(message)s"
DEFAULT_LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
DEFAULT_AES_KEY_LENGTH = 32


# dictconfig as described in
# https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema
Expand Down
34 changes: 27 additions & 7 deletions logprep/util/pseudo/commands/depseudonymize.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,41 @@
"""Command line tool to depseudonymize a string using the given keys."""

import logging
import sys

import click

from logprep.util.pseudo.depseudonymizer.depseudonymizer import Depseudonymizer
from logprep.util.pseudo.decrypter import (
DualPKCS1HybridCTRDecrypter,
DualPKCS1HybridGCMDecrypter,
)


@click.command()
@click.argument("analyst-key", type=str)
@click.argument("depseudo-key", type=str)
@click.argument("analyst-key", type=click.Path(exists=True))
@click.argument("depseudo-key", type=click.Path(exists=True))
@click.argument("pseudo-string", type=str)
def depseudonymize(analyst_key: str, depseudo_key: str, pseudo_string: str):
@click.option(
"--mode",
type=click.Choice(["gcm", "ctr"]),
default="ctr",
help="The mode to use for decryption",
)
def depseudonymize(analyst_key: str, depseudo_key: str, pseudo_string: str, mode: str):
"""depseudonymize a string using the given keys."""
depseudo = Depseudonymizer(pseudo_string)
depseudo = (
DualPKCS1HybridGCMDecrypter(pseudo_string)
if mode == "gcm"
else DualPKCS1HybridCTRDecrypter(pseudo_string)
)
keys = {}
for key_file_name in analyst_key, depseudo_key:
with open(f"{key_file_name}.key", "r", encoding="utf8") as key_file:
with open(f"{key_file_name}", "r", encoding="utf8") as key_file:
keys[key_file_name] = key_file.read()
depseudo.depseudo_key = keys[depseudo_key]
depseudo.analyst_key = keys[analyst_key]
print(depseudo.depseudonymize())
try:
print(depseudo.decrypt())
except Exception as e: # pylint: disable=broad-except
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
25 changes: 18 additions & 7 deletions logprep/util/pseudo/commands/pseudonymize.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,28 @@
"""Pseudonymize a string using the given keys and method."""

import click

from logprep.processor.pseudonymizer.encrypter import DualPKCS1HybridEncrypter
from logprep.util.pseudo.encrypter import (
DualPKCS1HybridCTREncrypter,
DualPKCS1HybridGCMEncrypter,
)


@click.command()
@click.argument("analyst-key", type=str)
@click.argument("depseudo-key", type=str)
@click.argument("analyst-key", type=click.Path(exists=True))
@click.argument("depseudo-key", type=click.Path(exists=True))
@click.argument("string", type=str)
def pseudonymize(analyst_key: str, depseudo_key: str, string: str):
@click.option(
"--mode",
type=click.Choice(["gcm", "ctr"]),
default="ctr",
help="The mode to use for decryption",
)
def pseudonymize(analyst_key: str, depseudo_key: str, string: str, mode: str):
"""pseudonymize a string using the given keys."""
encrypter = DualPKCS1HybridEncrypter()
encrypter = DualPKCS1HybridGCMEncrypter() if mode == "gcm" else DualPKCS1HybridCTREncrypter()
encrypter.load_public_keys(
keyfile_analyst=f"{analyst_key}.crt",
keyfile_depseudo=f"{depseudo_key}.crt",
keyfile_analyst=f"{analyst_key}",
keyfile_depseudo=f"{depseudo_key}",
)
print(encrypter.encrypt(string))
Loading

0 comments on commit d2f22ee

Please sign in to comment.