diff --git a/mypy.ini b/mypy.ini index fa1afb0b..136f4c79 100644 --- a/mypy.ini +++ b/mypy.ini @@ -36,4 +36,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-phonenumbers.*] +ignore_missing_imports = True + +[mypy-py_w3c.*] ignore_missing_imports = True \ No newline at end of file diff --git a/notifications_utils/template.py b/notifications_utils/template.py index bf387d83..b5d8d2b6 100644 --- a/notifications_utils/template.py +++ b/notifications_utils/template.py @@ -46,6 +46,7 @@ from notifications_utils.take import Take from notifications_utils.template_change import TemplateChange from notifications_utils.sanitise_text import SanitiseSMS +from notifications_utils.validate_html import check_if_string_contains_valid_html template_env = Environment( @@ -351,6 +352,7 @@ def __init__( logo_with_background_colour=False, brand_name=None, jinja_path=None, + allow_html=False, ): super().__init__(template, values, jinja_path=jinja_path) self.fip_banner_english = fip_banner_english @@ -361,6 +363,7 @@ def __init__( self.brand_colour = brand_colour self.logo_with_background_colour = logo_with_background_colour self.brand_name = brand_name + self.allow_html = allow_html # set this again to make sure the correct either utils / downstream local jinja is used # however, don't set if we are in a test environment (to preserve the above mock) if "pytest" not in sys.modules: @@ -390,7 +393,7 @@ def __str__(self): return self.jinja_template.render( { - "body": get_html_email_body(self.content, self.values), + "body": get_html_email_body(self.content, self.values, html="passthrough" if self.allow_html else "escape"), "preheader": self.preheader, "fip_banner_english": self.fip_banner_english, "fip_banner_french": self.fip_banner_french, @@ -423,6 +426,7 @@ def __init__( brand_name=None, logo_with_background_colour=None, asset_domain=None, + allow_html=False, ): super().__init__( template, @@ -442,6 +446,7 @@ def __init__( self.brand_text = brand_text self.brand_name = brand_name self.asset_domain = asset_domain or "assets.notification.canada.ca" + self.allow_html = allow_html def __str__(self): return Markup( @@ -451,6 +456,7 @@ def __str__(self): self.content, self.values, redact_missing_personalisation=self.redact_missing_personalisation, + html="passthrough" if self.allow_html else "escape", ), "subject": self.subject, "from_name": escape_html(self.from_name), @@ -724,14 +730,17 @@ def is_unicode(content): return set(content) & set(SanitiseSMS.WELSH_NON_GSM_CHARACTERS) -def get_html_email_body(template_content, template_values, redact_missing_personalisation=False): +def get_html_email_body(template_content, template_values, redact_missing_personalisation=False, html="escape"): + if html == "passthrough" and check_if_string_contains_valid_html(template_content) != []: + # template_content contains invalid html, so escape it + html = "escape" return ( Take( Field( template_content, template_values, - html="escape", + html=html, markdown_lists=True, redact_missing_personalisation=redact_missing_personalisation, ) diff --git a/notifications_utils/validate_html.py b/notifications_utils/validate_html.py new file mode 100644 index 00000000..6f21335c --- /dev/null +++ b/notifications_utils/validate_html.py @@ -0,0 +1,27 @@ +from py_w3c.validators.html.validator import HTMLValidator + + +def check_if_string_contains_valid_html(content: str) -> list: + """ + Check if html snippet is valid - returns [] if html is valid. + This is only a partial document, so we expect the Doctype and title to be missing. + """ + + allowed_errors = [ + "Start tag seen without seeing a doctype first. Expected “”.", + "Element “head” is missing a required instance of child element “title”.", + ] + + # the content can contain markdown as well as html - wrap the content in a div so it has a chance of being valid html + content_in_div = f"
{content}
" + + val = HTMLValidator() + val.validate_fragment(content_in_div) + + significant_errors = [] + for error in val.errors: + if error["message"] in allowed_errors: + continue + significant_errors.append(error) + + return significant_errors diff --git a/notifications_utils/version.py b/notifications_utils/version.py index 7fbab54f..c9b8ad47 100644 --- a/notifications_utils/version.py +++ b/notifications_utils/version.py @@ -1,2 +1,2 @@ -__version__ = "45.0.0" +__version__ = "46.0.0" # GDS version '34.0.1' diff --git a/setup.py b/setup.py index a49786a9..b0ebcc5b 100755 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ "pytz==2021.1", "smartypants==2.0.1", "pypdf2==1.26.0", + "py_w3c==0.3.1", # required by both api and admin "awscli==1.19.58", "boto3==1.17.58", diff --git a/tests/test_template_types.py b/tests/test_template_types.py index 794383b6..708b7e53 100644 --- a/tests/test_template_types.py +++ b/tests/test_template_types.py @@ -206,6 +206,28 @@ def test_alt_text_with_no_fip_banner(logo_with_background_colour, brand_text, ex assert expected_alt_text in email +@pytest.mark.parametrize("renderer", [HTMLEmailTemplate, EmailPreviewTemplate]) +@pytest.mark.parametrize( + "content, allow_html, expected", + [ + ("Hello World", True, "Hello World"), + ("Hello World", False, "Hello World"), + ("
Hello World
", True, "
Hello World
"), + ("
Hello World
", False, "<div>Hello World</div>"), + ], +) +def test_allow_html_works(content: str, allow_html: bool, expected: str, renderer): + email = str( + renderer( + {"content": content, "subject": ""}, + fip_banner_english=True, + allow_html=allow_html, + ) + ) + + assert expected in email + + @pytest.mark.parametrize("complete_html", (True, False)) @pytest.mark.parametrize( "branding_should_be_present, brand_logo, brand_text, brand_colour", diff --git a/tests/test_validate_html.py b/tests/test_validate_html.py new file mode 100644 index 00000000..dea80e91 --- /dev/null +++ b/tests/test_validate_html.py @@ -0,0 +1,30 @@ +import pytest + +from notifications_utils.validate_html import check_if_string_contains_valid_html + + +@pytest.mark.parametrize( + "good_content", + ( + "
abc
", + '
abc
', + """
+ alt text +
""", + "abc
abc
xyz", + ), +) +def test_good_content_is_valid(good_content: str): + assert check_if_string_contains_valid_html(good_content) == [] + + +@pytest.mark.parametrize( + "bad_content", ("
abc
", '', "abc
abc
xyz", '