Merge branch 'main' into opensearch

cfpb · May 17, 2022 · 4df604d · 4df604d
2 parents 899e6fa + e6e5890
commit 4df604d
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 6 deletions.
diff --git a/cfgov/ask_cfpb/tests/test_search.py b/cfgov/ask_cfpb/tests/test_search.py
@@ -139,7 +139,7 @@ def test_ask_search_autocomplete_honors_max_chars(self, mock_search):
         self.client.get(
             reverse("ask-autocomplete-en"), {"term": too_long_term}
         )
-        self.assertTrue(mock_search.called_with(valid_term))
+        self.assertTrue(mock_search.called_with(too_long_term))
 
     @mock.patch.object(AnswerPageDocument, "search")
     def test_ask_search_autocomplete(self, mock_search):

diff --git a/cfgov/core/tests/test_utils.py b/cfgov/core/tests/test_utils.py
@@ -134,7 +134,7 @@ def test_add_link_markup_anchor(self):
         )
 
     def check_external_link(
-        self, url, expected_href=None, expected_pretty_href=None
+        self, url, expected_href=None, expected_pretty_href=None, is_gov=False
     ):
         tag = f'<a href="{url}">foo</a>'
         path = "/about-us/blog/"
@@ -144,7 +144,7 @@ def check_external_link(
         expected_pretty_href = expected_pretty_href or url
 
         # .gov URLs don't get a data-pretty-href attribute
-        if ".gov" not in url:
+        if not is_gov:
             data_pretty_href = f'data-pretty-href="{expected_pretty_href}" '
 
         expected_html = (
@@ -159,9 +159,35 @@ def check_external_link(
 
         self.assertEqual(add_link_markup(tag, path), str(expected_tag))
 
+    def test_govdelivery_url1(self):
+        url = "https://public.govdelivery.com"
+        self.check_external_link(url, expected_href=signed_redirect(url))
+
+    def test_govdelivery_url2(self):
+        url = "https://www.govdelivery.com"
+        self.check_external_link(url, expected_href=signed_redirect(url))
+
+    def test_govdelivery_url3(self):
+        url = "https://www.govdelivery.com/something"
+        self.check_external_link(url, expected_href=signed_redirect(url))
+
     def test_dot_gov_urls(self):
         url = "https://www.federalreserve.gov"
-        self.check_external_link(url, expected_href=url)
+        self.check_external_link(
+            url, expected_href=url, expected_pretty_href=None, is_gov=True
+        )
+
+    def test_dot_gov_urls2(self):
+        url = "https://www.federalreserve.gov/something"
+        self.check_external_link(
+            url, expected_href=url, expected_pretty_href=None, is_gov=True
+        )
+
+    def test_content_cfgov(self):
+        url = "http://content.cfpb.gov"
+        tag = "<a href='{}'>foo</a>".format(url)
+        path = "/"
+        self.assertIsNone(add_link_markup(tag, path))
 
     def test_urls_with_gov_in_them(self):
         url = "https://www.realgovsite.lol"

diff --git a/cfgov/core/utils.py b/cfgov/core/utils.py
@@ -13,10 +13,16 @@
 NON_GOV_LINKS = re.compile(
     r"https?:\/\/(?:www\.)?(?![^\?]+\.gov)(?!(content\.)?localhost).*"
 )
+
 NON_CFPB_LINKS = re.compile(
     r"(https?:\/\/(?:www\.)?(?![^\?]*(cfpb|consumerfinance).gov)"
     r"(?!(content\.)?localhost).*)"
 )
+
+LINK_PATTERN = re.compile(
+    r"^(?P<schema>https?)://(?P<domain>[^/:]+):?(?P<port>\d+)?(?P<path>/?.*)?$"
+)
+
 DOWNLOAD_LINKS = re.compile(
     r"(?i)(\.pdf|\.doc|\.docx|\.xls|\.xlsx|\.csv|\.zip)$"
 )
@@ -62,6 +68,13 @@
 ]
 
 
+def should_interstitial(url: str) -> bool:
+    match = LINK_PATTERN.match(url)
+    if match.group("domain").endswith(".gov") and NON_CFPB_LINKS.match(url):
+        return False
+    return True
+
+
 def sign_url(url):
     signer = Signer(sep="||")
     url, signature = signer.sign(url).split("||")
@@ -162,7 +175,7 @@ def add_link_markup(tag, request_path):
     elif NON_CFPB_LINKS.match(href):
         # Sets the icon to indicate you're leaving consumerfinance.gov
         icon = "external-link"
-        if NON_GOV_LINKS.match(href):
+        if should_interstitial(href):
             # Add pretty URL for print styles
             tag["data-pretty-href"] = href
             # Add the redirect notice as well
@@ -190,7 +203,7 @@ def add_link_markup(tag, request_path):
         return str(tag)
 
     if not icon:
-        return None
+        return
 
     icon_classes = {"class": LINK_ICON_TEXT_CLASSES}
     spans = tag.findAll("span", icon_classes)