superdesk · marwoodandrew · Jan 24, 2024 · Dec 22, 2023
diff --git a/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py b/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py
@@ -70,7 +70,7 @@ def format(self, article, subscriber, codes=None):
             formatted_article['abstract'] = self.get_text_content(
                 to_ascii(formatted_article.get('abstract', '') or '')).strip()
             formatted_article['headline'] = self.get_text_content(
-                to_ascii(formatted_article.get('headline', ''))).strip()
+                to_ascii(formatted_article.get('headline', '')), space_on_elements=False).strip()
             formatted_article['byline'] = self.get_text_content(
                 to_ascii(formatted_article.get('byline', '') or '')).strip()
 
@@ -116,7 +116,7 @@ def format(self, article, subscriber, codes=None):
     def can_format(self, format_type, article):
         return format_type == 'AAP BULLETIN BUILDER'
 
-    def get_text_content(self, content):
+    def get_text_content(self, content, space_on_elements=True):
         content = content.replace('<br>', '<br/>').replace('</br>', '')
         # remove control chars except \n
         content = re.sub('[\x00-\x09\x0b-\x1f]', '', content)
@@ -125,7 +125,7 @@ def get_text_content(self, content):
         if content == '':
             return ''
 
-        parsed = parse_html(content, content='html', space_on_elements=True)
+        parsed = parse_html(content, content='html', space_on_elements=space_on_elements)
 
         # breaks are replaced with spaces
         for br in parsed.xpath('//br'):

diff --git a/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py b/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py
@@ -786,3 +786,59 @@ def test_embedded_item(self):
         self.assertGreater(int(seq), 0)
         test_article = json.loads(item.get('data'))
         self.assertEqual(test_article['body_html'], '<p>pre amble</p><p>post amble</p>')
+
+    def test_clean_headline_html(self):
+        article = {
+            config.ID_FIELD: '123',
+            config.VERSION: 2,
+            'source': 'AAP',
+            'headline': '1234567890123456789012345123456789012345678901234567890',
+            'slugline': 'slugline',
+            'abstract': '<p>abstract</p>',
+            'type': 'text',
+            'anpa_category': [{'qcode': 'a', 'name': 'Australian General News'}],
+            'flags': {
+                'marked_for_legal': True
+            },
+            'body_html': ('<p>The story<p>'),
+            "fields_meta": {
+                "headline": {
+                    "draftjsState": [
+                        {
+                            "blocks": [
+                                {
+                                    "key": "2fvvl",
+                                    "text": "1234567890123456789012345123456789012345678901234567890",
+                                    "type": "unstyled",
+                                    "depth": 0,
+                                    "inlineStyleRanges": [
+                                        {
+                                            "offset": 0,
+                                            "length": 55,
+                                            "style": "BOLD"
+                                        },
+                                        {
+                                            "offset": 54,
+                                            "length": 1,
+                                            "style": "LIMIT_CHARACTERS_OVERFLOW"
+                                        }
+                                    ],
+                                    "entityRanges": [],
+                                    "data": {
+                                        "MULTIPLE_HIGHLIGHTS": {}
+                                    }
+                                }
+                            ],
+                            "entityMap": {}
+                        }
+                    ]
+                }
+            }
+        }
+
+        subscriber = self.app.data.find('subscribers', None, None)[0][0]
+        seq, item = self._formatter.format(article, subscriber)[0]
+        item = json.loads(item)
+        self.assertGreater(int(seq), 0)
+        test_article = json.loads(item.get('data'))
+        self.assertEqual(test_article['headline'], '1234567890123456789012345123456789012345678901234567890')