diff --git a/dlp/custom_infotype.py b/dlp/custom_infotype.py index 3493476380fa..73cdec396024 100644 --- a/dlp/custom_infotype.py +++ b/dlp/custom_infotype.py @@ -83,3 +83,63 @@ def omit_name_if_also_email( # [END dlp_omit_name_if_also_email] + +# [START dlp_inspect_with_medical_record_number_custom_regex_detector] +def inspect_with_medical_record_number_custom_regex_detector( + project, + content_string, +): + """Uses the Data Loss Prevention API to analyze string with medical record + number custom regex detector + Args: + project: The Google Cloud project id to use as a parent resource. + content_string: The string to inspect. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Construct a custom regex detector info type called "C_MRN", + # with ###-#-##### pattern, where each # represents a digit from 1 to 9. + # The detector has a detection likelihood of POSSIBLE. + custom_info_types = [ + { + "info_type": {"name": "C_MRN"}, + "regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"}, + "likelihood": "POSSIBLE", + } + ] + + # Construct the configuration dictionary with the custom regex info type. + inspect_config = { + "custom_info_types": custom_info_types, + } + + # Construct the `item`. + item = {"value": content_string} + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.inspect_content(parent, inspect_config, item) + + # Print out the results. + if response.result.findings: + for finding in response.result.findings: + try: + if finding.quote: + print(f"Quote: {finding.quote}") + except AttributeError: + pass + print(f"Info type: {finding.info_type.name}") + print(f"Likelihood: {finding.likelihood}") + else: + print("No findings.") + +# [END dlp_inspect_with_medical_record_number_custom_regex_detector] diff --git a/dlp/custom_infotype_test.py b/dlp/custom_infotype_test.py index 521b09650b67..d4a0ea669ebe 100644 --- a/dlp/custom_infotype_test.py +++ b/dlp/custom_infotype_test.py @@ -26,3 +26,11 @@ def test_omit_name_if_also_email(capsys): # Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME. assert len(info_types) == 1 assert info_types[0] == "EMAIL_ADDRESS" + + +def test_inspect_with_medical_record_number_custom_regex_detector(capsys): + custom_infotype.inspect_with_medical_record_number_custom_regex_detector( + GCLOUD_PROJECT, "Patients MRN 444-5-22222") + + out, _ = capsys.readouterr() + assert "Info type: C_MRN" in out