Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple dicom versions #83

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# DicomAnonymizer

Python package to anonymize DICOM files.
The anonymization answer to the standard . More information about dicom fields for anonymization can be found [here](http://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html#table_E.1-1).
The anonymization answer to the standard . More information about dicom fields for anonymization can be found [here](http://dicom.nema.org/dicom/2023/output/chtml/part15/chapter_E.html#table_E.1-1).

The default behaviour of this package is to anonymize DICOM fields referenced in [dicomfields](dicomanonymizer/dicomfields.py).

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Tags anonymized in DICOM standard
# Documentation for groups meaning can be found in default associated actions.
# http://dicom.nema.org/dicom/2013/output/chtml/part15/chapter_E.html#table_E.1-1
# http://dicom.nema.org/dicom/2023/output/chtml/part15/chapter_E.html#table_E.1-1

# Replaced tags
D_TAGS = [
Expand Down
674 changes: 674 additions & 0 deletions dicomanonymizer/dicom_anonymization_databases/dicomfields_2024b.py

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions dicomanonymizer/dicomfields_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import importlib

ANONYMIZATION_CATEGORIES = [
"D_TAGS",
"Z_TAGS",
"X_TAGS",
"U_TAGS",
"Z_D_TAGS",
"X_Z_TAGS",
"X_D_TAGS",
"X_Z_D_TAGS",
"X_Z_U_STAR_TAGS",
"ALL_TAGS",
]


def dicom_anonymization_database_selector(
dicom_version: str = "dicomfields_2023",
) -> dict:
try:
dicom_anonymization_database = importlib.import_module(
f"dicomanonymizer.dicom_anonymization_databases.{dicom_version}"
)
except ModuleNotFoundError:
raise ValueError(f"Unknown DICOM anonymization database: {dicom_version}")

try:
dicom_anonymization_dict = {
anonymization_category: getattr(
dicom_anonymization_database, anonymization_category
)
for anonymization_category in ANONYMIZATION_CATEGORIES
}
except AttributeError:
print(
f"Anonymization database {dicom_version} is missing a category, please check it has them all."
)
raise
return dicom_anonymization_dict
58 changes: 33 additions & 25 deletions dicomanonymizer/simpledicomanonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,13 @@
import re

from enum import Enum
from typing import List, Union
from typing import Callable, List, Union
from dataclasses import dataclass

from dicomanonymizer.dicomfields import (
D_TAGS,
Z_TAGS,
X_TAGS,
U_TAGS,
Z_D_TAGS,
X_Z_TAGS,
X_D_TAGS,
X_Z_D_TAGS,
X_Z_U_STAR_TAGS,
)
from dicomanonymizer.dicomfields_selector import dicom_anonymization_database_selector
from dicomanonymizer.format_tag import tag_to_hex_strings


# keeps the mapping from old UID to new UID
dictionary = {}


Expand Down Expand Up @@ -344,33 +334,47 @@ class ActionsMapNameFunctions(Enum):
regexp = Action(regexp, 2)


def initialize_actions() -> dict:
def initialize_actions(dicom_version: str = "dicomfields_2023") -> dict:
"""
Initialize anonymization actions with DICOM standard values

:param dicom_version: DICOM version to use
:return Dict object which map actions to tags
"""
anonymization_actions = {tag: replace for tag in D_TAGS}
anonymization_actions.update({tag: empty for tag in Z_TAGS})
anonymization_actions.update({tag: delete for tag in X_TAGS})
anonymization_actions.update({tag: replace_UID for tag in U_TAGS})
anonymization_actions.update({tag: empty_or_replace for tag in Z_D_TAGS})
anonymization_actions.update({tag: delete_or_empty for tag in X_Z_TAGS})
anonymization_actions.update({tag: delete_or_replace for tag in X_D_TAGS})
tags = dicom_anonymization_database_selector(dicom_version)

anonymization_actions = {tag: replace for tag in tags["D_TAGS"]}
anonymization_actions.update({tag: empty for tag in tags["Z_TAGS"]})
anonymization_actions.update({tag: delete for tag in tags["X_TAGS"]})
anonymization_actions.update({tag: replace_UID for tag in tags["U_TAGS"]})
anonymization_actions.update({tag: empty_or_replace for tag in tags["Z_D_TAGS"]})
anonymization_actions.update({tag: delete_or_empty for tag in tags["X_Z_TAGS"]})
anonymization_actions.update({tag: delete_or_replace for tag in tags["X_D_TAGS"]})
anonymization_actions.update(
{tag: delete_or_empty_or_replace for tag in X_Z_D_TAGS}
{tag: delete_or_empty_or_replace for tag in tags["X_Z_D_TAGS"]}
)
anonymization_actions.update(
{tag: delete_or_empty_or_replace_UID for tag in X_Z_U_STAR_TAGS}
{tag: delete_or_empty_or_replace_UID for tag in tags["X_Z_U_STAR_TAGS"]}
)
return anonymization_actions


def initialize_actions_2024b() -> dict:
"""
Initialize anonymization actions with DICOM standard values of 2024b.
If you want to use 2024b version of anonymization, call anonymize_dataset with base_rules_gen=initialize_actions_2024b.

:return Dict object which map actions to tags
"""
return initialize_actions("dicomfields_2024b")


def anonymize_dicom_file(
in_file: str,
out_file: str,
extra_anonymization_rules: dict = None,
delete_private_tags: bool = True,
base_rules_gen: Callable = initialize_actions,
) -> None:
"""
Anonymize a DICOM file by modifying personal tags
Expand All @@ -384,7 +388,9 @@ def anonymize_dicom_file(
"""
dataset = pydicom.dcmread(in_file)

anonymize_dataset(dataset, extra_anonymization_rules, delete_private_tags)
anonymize_dataset(
dataset, extra_anonymization_rules, delete_private_tags, base_rules_gen
)

# Store modified image
dataset.save_as(out_file)
Expand Down Expand Up @@ -450,15 +456,17 @@ def anonymize_dataset(
dataset: pydicom.Dataset,
extra_anonymization_rules: dict = None,
delete_private_tags: bool = True,
base_rules_gen: Callable = initialize_actions,
) -> None:
"""
Anonymize a pydicom Dataset by using anonymization rules which links an action to a tag

:param dataset: Dataset to be anonymize
:param base_rules_gen: Function to generate the base rules
:param extra_anonymization_rules: Rules to be applied on the dataset
:param delete_private_tags: Define if private tags should be delete or not
"""
current_anonymization_actions = initialize_actions()
current_anonymization_actions = base_rules_gen()

if extra_anonymization_rules is not None:
current_anonymization_actions.update(extra_anonymization_rules)
Expand Down
2 changes: 1 addition & 1 deletion examples/anonymize_extra_rules.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse

from dicomanonymizer.dicomfields import ALL_TAGS
from dicomanonymizer.dicom_anonymization_databases.dicomfields_2023 import ALL_TAGS
from dicomanonymizer import anonymize, keep


Expand Down
3 changes: 2 additions & 1 deletion scripts/scrap_DICOM_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,9 @@ def create_DICOM_fields(profiles):

def main(
url="https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html",
output_path="dicomanonymizer/dicomfields.py",
output_path="dicomanonymizer/dicomfields_2024b.py",
):
# As of 2024.05.14, the current version of DICOM spec is 2024b.
profiles = scrap_profiles(url)
file_content = create_DICOM_fields(profiles=profiles)
with open(output_path, "w") as file:
Expand Down
18 changes: 9 additions & 9 deletions tests/test_anon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pydicom.data import get_testdata_files

from dicomanonymizer.simpledicomanonymizer import anonymize_dataset
from dicomanonymizer import dicomfields
from dicomanonymizer.dicom_anonymization_databases import dicomfields_2023

# Ignore warnings from pydicom validation
settings.writing_validation_mode = IGNORE
Expand Down Expand Up @@ -51,7 +51,7 @@ def orig_anon_dataset(request):

def test_deleted_tags_are_removed(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset
deleted_tags = dicomfields.X_TAGS
deleted_tags = dicomfields_2023.X_TAGS

for tt in deleted_tags: # sourcery skip: no-loop-in-tests
if (
Expand All @@ -66,12 +66,12 @@ def test_deleted_tags_are_removed(orig_anon_dataset):


changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
dicomfields_2023.U_TAGS
+ dicomfields_2023.D_TAGS
+ dicomfields_2023.Z_D_TAGS
+ dicomfields_2023.X_D_TAGS
+ dicomfields_2023.X_Z_D_TAGS
+ dicomfields_2023.X_Z_U_STAR_TAGS
)

empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED")
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_changed_tags_are_replaced(orig_anon_dataset):
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"


empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
empty_tags = dicomfields_2023.Z_TAGS + dicomfields_2023.X_Z_TAGS


def is_elem_empty(elem) -> bool:
Expand Down
39 changes: 38 additions & 1 deletion tests/test_anonymization_without_dicom.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import pydicom

from dicomanonymizer import anonymize_dataset
from dicomanonymizer.simpledicomanonymizer import empty
from dicomanonymizer.simpledicomanonymizer import (
empty,
initialize_actions,
initialize_actions_2024b,
)


def test_anonymization_without_dicom_file():
Expand Down Expand Up @@ -74,3 +78,36 @@ def test_anonymization_of_ranged_tags_without_dicom_file():
# Check that the dataset has been anonymized
assert (0x5011, 0x0110) not in anon_ds
assert (0x5012, 0x0112) not in anon_ds


def test_switching_dicom_versions():
"""To confirm the different behavior of annonymization beteen dicom versions of 2023 and 2024b."""
fields = [
{ # Replaced by Anonymized
"id": (0x0010, 0x0020),
"type": "LO",
"value": "Test Patient ID",
},
]

# Create a readable dataset for pydicom
data = pydicom.Dataset()
data_2023 = pydicom.Dataset()
data_2024b = pydicom.Dataset()

for field in fields: # sourcery skip: no-loop-in-tests
data.add_new(field["id"], field["type"], field["value"])
data_2023.add_new(field["id"], field["type"], field["value"])
data_2024b.add_new(field["id"], field["type"], field["value"])

anonymize_dataset(data, base_rules_gen=initialize_actions)
anonymize_dataset(
data_2023, base_rules_gen=lambda: initialize_actions("dicomfields_2023")
)
anonymize_dataset(data_2024b, base_rules_gen=initialize_actions_2024b)

assert data[(0x0010, 0x0020)].value == "" # default behavior which is DICOM 2023.
assert data_2023[(0x0010, 0x0020)].value == "" # same as the default.
assert (
data_2024b[(0x0010, 0x0020)].value == "ANONYMIZED"
) # 2024b differs from the default
44 changes: 44 additions & 0 deletions tests/test_dicomfields_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from dicomanonymizer.dicom_anonymization_databases import dicomfields_2024b
from dicomanonymizer.dicomfields_selector import dicom_anonymization_database_selector

from dicomanonymizer.dicom_anonymization_databases import dicomfields_2023


def test_selector():
assert dicom_anonymization_database_selector("dicomfields_2023") == {
"D_TAGS": dicomfields_2023.D_TAGS,
"Z_TAGS": dicomfields_2023.Z_TAGS,
"X_TAGS": dicomfields_2023.X_TAGS,
"U_TAGS": dicomfields_2023.U_TAGS,
"Z_D_TAGS": dicomfields_2023.Z_D_TAGS,
"X_Z_TAGS": dicomfields_2023.X_Z_TAGS,
"X_D_TAGS": dicomfields_2023.X_D_TAGS,
"X_Z_D_TAGS": dicomfields_2023.X_Z_D_TAGS,
"X_Z_U_STAR_TAGS": dicomfields_2023.X_Z_U_STAR_TAGS,
"ALL_TAGS": dicomfields_2023.ALL_TAGS,
}
assert dicom_anonymization_database_selector("dicomfields_2024b") == {
"D_TAGS": dicomfields_2024b.D_TAGS,
"Z_TAGS": dicomfields_2024b.Z_TAGS,
"X_TAGS": dicomfields_2024b.X_TAGS,
"U_TAGS": dicomfields_2024b.U_TAGS,
"Z_D_TAGS": dicomfields_2024b.Z_D_TAGS,
"X_Z_TAGS": dicomfields_2024b.X_Z_TAGS,
"X_D_TAGS": dicomfields_2024b.X_D_TAGS,
"X_Z_D_TAGS": dicomfields_2024b.X_Z_D_TAGS,
"X_Z_U_STAR_TAGS": dicomfields_2024b.X_Z_U_STAR_TAGS,
"ALL_TAGS": dicomfields_2024b.ALL_TAGS,
}

# check default selector
assert (
dicom_anonymization_database_selector()
== dicom_anonymization_database_selector("dicomfields_2023")
)

try:
dicom_anonymization_database_selector("2019")
except ValueError as e:
assert str(e) == "Unknown DICOM anonymization database: 2019"
else:
assert False