brave · bbondy · Jul 6, 2020 · Jun 9, 2020 · Jun 24, 2020 · Jun 26, 2020
diff --git a/script/chromium-rebase-l10n.py b/script/chromium-rebase-l10n.py
@@ -2,10 +2,13 @@
 import io
 import os.path
 from os import walk
+import re
 import sys
 from lxml import etree
 from lib.config import get_env_var
-from lib.transifex import pull_source_files_from_transifex
+from lib.grd_string_replacements import (write_xml_file_from_tree, write_braveified_grd_override,
+                                         update_braveified_grd_tree_override, get_override_file_path)
+from lib.transifex import pull_source_files_from_transifex, textify
 
 SOURCE_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
 
@@ -16,10 +19,58 @@ def parse_args():
     return parser.parse_args()
 
 
+def generate_overrides_and_replace_strings(source_string_path):
+    original_xml_tree_with_branding_fixes = etree.parse(source_string_path)
+    update_braveified_grd_tree_override(original_xml_tree_with_branding_fixes, True)
+    write_braveified_grd_override(source_string_path)
+    modified_xml_tree = etree.parse(source_string_path)
+
+    original_messages = original_xml_tree_with_branding_fixes.xpath('//message')
+    modified_messages = modified_xml_tree.xpath('//message')
+    assert len(original_messages) == len(modified_messages)
+    for i in range(0, len(original_messages)):
+        if textify(original_messages[i]) == textify(modified_messages[i]):
+            modified_messages[i].getparent().remove(modified_messages[i])
+
+    # Remove uneeded things from the override grds
+    nodes_to_remove = [
+        '//outputs',
+        '//comment()',
+    ]
+    for xpath_expr in nodes_to_remove:
+        nodes = modified_xml_tree.xpath(xpath_expr)
+        for n in nodes:
+            if n.getparent() is not None:
+                n.getparent().remove(n)
+    parts = modified_xml_tree.xpath('//part')
+    for part in parts:
+        override_file = get_override_file_path(part.attrib['file'])
+        if os.path.exists(os.path.join(os.path.dirname(source_string_path), override_file)):
+            part.attrib['file'] = override_file
+        else:
+            # No grdp override here, carry on
+            part.getparent().remove(part)
+    files = modified_xml_tree.xpath('//file')
+    for f in files:
+        f.attrib['path'] = get_override_file_path(f.attrib['path'])
+
+    # Write out an override file that is a duplicate of the original file but with strings that
+    # are shared with Chrome stripped out.
+    filename = os.path.basename(source_string_path)
+    (basename, ext) = filename.split('.')
+    override_string_path = get_override_file_path(source_string_path)
+    modified_messages = modified_xml_tree.xpath('//message')
+    modified_parts = modified_xml_tree.xpath('//part')
+    if len(modified_messages) > 0 or len(modified_parts) > 0:
+        write_xml_file_from_tree(override_string_path, modified_xml_tree)
+
+
 def main():
     args = parse_args()
+    # This file path is a string path inside brave/ but just recently copied
+    # in from chromium files which need replacements.
     source_string_path = os.path.join(SOURCE_ROOT, args.source_string_path[0])
-    filename = os.path.basename(source_string_path).split('.')[0]
+    filename = os.path.basename(source_string_path)
     extension = os.path.splitext(source_string_path)[1]
     if (extension != '.grd' and extension != '.grdp'):
         print 'returning early'
@@ -28,11 +79,13 @@ def main():
     print 'Rebasing source string file:', source_string_path
     print 'filename:', filename
 
-    content = ''
-    xml_tree = etree.parse(source_string_path)
+    generate_overrides_and_replace_strings(source_string_path)
+
     # If you modify the translateable attribute then also update
     # is_translateable_string function in brave/script/lib/transifex.py.
-    if filename == 'brave_strings':
+    xml_tree = etree.parse(source_string_path)
+    (basename, ext) = filename.split('.')
+    if basename == 'brave_strings':
         elem1 = xml_tree.xpath('//message[@name="IDS_SXS_SHORTCUT_NAME"]')[0]
         elem1.text = 'Brave Nightly'
         elem1.attrib.pop('desc')

diff --git a/script/lib/grd_string_replacements.py b/script/lib/grd_string_replacements.py
@@ -0,0 +1,143 @@
+from lxml import etree
+import os
+import re
+
+
+# Strings we want to replace but that we also replace automatically for XTB files
+branding_replacements = [
+    (r'The Chromium Authors. All rights reserved.', r'The Brave Authors. All rights reserved.'),
+    (r'Google LLC. All rights reserved.', r'The Brave Authors. All rights reserved.'),
+    (r'The Chromium Authors', r'Brave Software Inc'),
+    (r'Google Chrome', r'Brave'),
+    (r'(Google)(?! Play)', r'Brave'),
+    (r'Chromium', r'Brave'),
+    (r'Chrome', r'Brave'),
+]
+
+
+# Strings we want to replace but that we need to use Transifex for
+# to translate the XTB files
+default_replacements = [
+    (r'Brave Web Store', r'Web Store'),
+    (r'Automatically send usage statistics and crash reports to Brave',
+     r'Automatically send crash reports to Brave'),
+    (r'Automatically sends usage statistics and crash reports to Brave',
+     r'Automatically sends crash reports to Brave'),
+    (r'You\'re incognito', r'This is a private window'),
+    (r'an incognito', r'a private'),
+    (r'an Incognito', r'a Private'),
+    (r'incognito', r'private'),
+    (r'Incognito', r'Private'),
+    (r'inco&gnito', r'&private'),
+    (r'Inco&gnito', r'&Private'),
+    (r'People', r'Profiles'),
+    # 'people' but only in the context of profiles, not humans.
+    (r'(?<!authenticate )people', r'profiles'),
+    (r'(Person)(?!\w)', r'Profile'),
+    (r'(person)(?!\w)', r'profile'),
+    (r'Bookmarks Bar\n', r'Bookmarks\n'),
+    (r'Bookmarks bar\n', r'Bookmarks\n'),
+    (r'bookmarks bar\n', r'bookmarks\n'),
+]
+
+
+# Fix up some strings after aggressive first round replacement.
+fixup_replacements = [
+    (r'Brave Cloud Print', r'Google Cloud Print'),
+    (r'Brave Docs', r'Google Docs'),
+    (r'Brave Drive', r'Google Drive'),
+    (r'Brave OS', r'Chrome OS'),
+    (r'Brave Safe Browsing', r'Google Safe Browsing'),
+    (r'Safe Browsing \(protects you and your device from dangerous sites\)',
+     r'Google Safe Browsing (protects you and your device from dangerous sites)'),
+    (r'Sends URLs of some pages you visit to Brave', r'Sends URLs of some pages you visit to Google'),
+    (r'Google Google', r'Google'),
+]
+
+
+# Replacements for text nodes and neither for inside descriptions nor comments
+main_text_only_replacements = [
+    (r'Copyright', u'Copyright \xa9'),
+]
+
+
+def braveify_grd_text(text, is_main_text, branding_replacements_only):
+    """Replaces text string to Brave wording"""
+    for (pattern, to) in branding_replacements:
+        text = re.sub(pattern, to, text)
+    if not branding_replacements_only:
+        for (pattern, to) in default_replacements:
+            text = re.sub(pattern, to, text)
+    for (pattern, to) in fixup_replacements:
+        text = re.sub(pattern, to, text)
+    if is_main_text:
+        for (pattern, to) in main_text_only_replacements:
+            text = re.sub(pattern, to, text)
+    return text
+
+
+def generate_braveified_node(elem, is_comment, branding_replacements_only):
+    """Replaces a node and attributes to Brave wording"""
+    if elem.text:
+        elem.text = braveify_grd_text(elem.text, not is_comment, branding_replacements_only)
+
+    if elem.tail:
+        elem.tail = braveify_grd_text(elem.tail, not is_comment, branding_replacements_only)
+
+    if 'desc' in elem.keys():
+        elem.attrib['desc'] = braveify_grd_text(elem.attrib['desc'], False, branding_replacements_only)
+    if 'meaning' in elem.keys():
+        elem.attrib['meaning'] = braveify_grd_text(elem.attrib['meaning'], False, branding_replacements_only)
+    for child in elem:
+        generate_braveified_node(child, is_comment, branding_replacements_only)
+
+
+def format_xml_style(xml_content):
+    """Formats an xml file according to how Chromium GRDs are formatted"""
+    xml_content = re.sub(r'\s+desc="', r' desc="', xml_content)
+    xml_content = xml_content.replace('/>', ' />')
+    xml_content = xml_content.replace(r'<?xml version="1.0" encoding="UTF-8"?>',
+                                      r'<?xml version=\'1.0\' encoding=\'UTF-8\'?>')
+    return xml_content
+
+
+def write_xml_file_from_tree(string_path, xml_tree):
+    """Writes out an xml tree to a file with Chromium GRD formatting replacements"""
+    transformed_content = etree.tostring(xml_tree,
+                                         pretty_print=True,
+                                         xml_declaration=True,
+                                         encoding='UTF-8')
+    transformed_content = format_xml_style(transformed_content)
+    with open(string_path, mode='w') as f:
+        f.write(transformed_content)
+
+
+def update_braveified_grd_tree_override(source_xml_tree, branding_replacements_only):
+    """Takes in a grd(p) tree and replaces all messages and comments with Brave wording"""
+    for elem in source_xml_tree.xpath('//message'):
+        generate_braveified_node(elem, False, branding_replacements_only)
+    for elem in source_xml_tree.xpath('//comment()'):
+        generate_braveified_node(elem, True, branding_replacements_only)
+
+
+def write_braveified_grd_override(source_string_path):
+    """Takes in a grd file and replaces all messages and comments with Brave wording"""
+    source_xml_tree = etree.parse(source_string_path)
+    update_braveified_grd_tree_override(source_xml_tree, False)
+    write_xml_file_from_tree(source_string_path, source_xml_tree)
+
+
+def get_override_file_path(source_string_path):
+    """Obtain src/brave source string override path for local grd strings with replacements"""
+    filename = os.path.basename(source_string_path)
+    (basename, ext) = filename.split('.')
+    if ext == 'xtb':
+        # _override goes after the string name but before the _[locale].xtb part
+        parts = basename.split('_')
+        parts.insert(-1, 'override')
+        override_string_path = os.path.join(os.path.dirname(source_string_path),
+                                            '.'.join(('_'.join(parts), ext)))
+    else:
+        override_string_path = os.path.join(os.path.dirname(source_string_path),
+                                            '.'.join((basename + '_override', ext)))
+    return override_string_path