Skip to content

Commit

Permalink
Add script to extract resource string, issue zufuliu#215, zufuliu#223.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Jul 19, 2020
1 parent f4951a1 commit df0bc7c
Show file tree
Hide file tree
Showing 2 changed files with 314 additions and 3 deletions.
315 changes: 313 additions & 2 deletions locale/Locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import shutil
import uuid
import subprocess

app = os.path.basename(__file__)
localeDir = '.'
Expand Down Expand Up @@ -132,14 +133,318 @@ def copy_back_localized_resources(language):
you can copy English resources back by run: {app} back en""")


class StringExtractor:
def reset(self, path, reversion):
self.reversion = reversion
self.path = path
self.changed_lines = set()
if reversion:
self.find_changed_lines(reversion)
if not self.changed_lines:
return False

doc = open(path, encoding='utf-8', newline='\n').read()
self.lines = doc.splitlines()
return len(self.lines)

def find_changed_lines(self, reversion):
cmd = ['git', 'diff', '--no-color', '--unified=0', '--text', reversion, self.path]
result = subprocess.run(cmd, capture_output=True, encoding='utf-8')
if result.stderr:
print(result.stderr, file=sys.stderr)

diff = result.stdout
if not diff:
return

items = re.findall(r'^@@\s+\-\d+(,\d+)?\s+\+(\d+)(,\d+)?\s+@@', diff, re.MULTILINE)
for item in items:
_, line, count = item
line = int(line)
count = int(count[1:]) if count else 0
if count:
self.changed_lines.update(range(line, line + count))
else:
self.changed_lines.add(line)

# reversion time
cmd =['git', 'show', '--no-patch', '--no-notes', "--pretty='%ci'", reversion]
result = subprocess.run(cmd, capture_output=True, encoding='utf-8')
if result.stderr:
print(result.stderr, file=sys.stderr)
items = result.stdout.replace("'", '').split()[:2]
self.reversion = f"{reversion} {' '.join(items)}".strip()

def is_line_changed(self, start, end):
if not self.changed_lines:
return True

while start <= end:
if start in self.changed_lines:
return True
start += 1
return False

def match_line(self, line, word):
if line.startswith(word):
ch = ' ' if len(line) == len(word) else line[len(word)]
return ch.isspace() or ch == '/'
return False

def scan_string(self, line, escape_sequence, format_specifier, access_key, start):
index = 0
if start:
index = line.find('"')
if index <= 0:
return '', 0, 0, False

length = len(line)
begin = index
if start:
index += 1
stop = False

while index < length:
ch = line[index]
index += 1
if ch == '\\':
ch = line[index] if index < length else ''
end = index + 1
if ch and ch in 'abfnrtvxu':
if ch == 'x':
end += 2
elif ch == 'u':
end += 4
sequence = line[index - 1:end]
escape_sequence.add(sequence)
index = end
elif ch == '&':
if access_key != None:
ch = line[index]
if ch != '&' and not ch.isspace():
access_key.append(ch)
elif ch == '%':
ch = line[index]
# we only use '%s' in resource files
if ch == 's':
format_specifier.add('%' + ch)
elif ch == '"':
if index < length and line[index] == ch:
index += 1
else:
stop = True
break

value = line[begin:index]
return value, begin, index, stop

def build_hint(self, escape_sequence, format_specifier, access_key):
hint = ''
if access_key:
assert len(access_key) == 1
hint += ', access key: ' + access_key[0].upper()
if escape_sequence:
hint += ', escape sequence: ' + ', '.join(sorted(escape_sequence))
if format_specifier:
hint += ', format specifier: ' + ', '.join(sorted(format_specifier))
return hint

def parse_resource_item(self, lineno, line, block_items):
if not self.is_line_changed(lineno, lineno):
return None

escape_sequence = set()
format_specifier = set()
access_key = []
value, begin, index, _ = self.scan_string(line, escape_sequence, format_specifier, access_key, True)
if not any(ch.isalpha() for ch in value):
return None

# type "string", id
word = line[:begin].strip()
if not word.isidentifier():
return None

rcid = ''
begin = line.find(',', index)
if begin > 0:
index = line.find(',', begin + 1)
if index > 0:
rcid = line[begin + 1:index].strip()
else:
rcid = line[begin + 1:].strip()
assert rcid.isidentifier()

if word == 'CAPTION':
return f'{word} {value}'

comment = f'// {lineno} {word} {rcid}'.strip()
comment += self.build_hint(escape_sequence, format_specifier, access_key)
block_items.append({
'value': value,
'comment': comment
})
return None

def parse_string_table_item(self, lineno, line, block_items):
# id "multiline string"
escape_sequence = set()
format_specifier = set()
access_key = []
value, begin, index, stop = self.scan_string(line, escape_sequence, format_specifier, access_key, True)

rcid = line[:begin].strip()
assert rcid.isidentifier()

result = [value]
start = lineno
while not stop:
line = self.lines[lineno]
lineno += 1
value, begin, index, stop = self.scan_string(line, escape_sequence, format_specifier, access_key, False)
result.append(value)

if not self.is_line_changed(start, lineno):
return lineno

value = '\n'.join(result)
if not any(ch.isalpha() for ch in value):
return lineno

comment = f'// {start} {rcid}'
comment += self.build_hint(escape_sequence, format_specifier, access_key)
block_items.append({
'value': value,
'comment': comment
})
return lineno

def extract(self, path, reversion, out_path=None):
if not self.reset(path, reversion):
return

Block_None = 0
Block_Menu = 1
Block_DialogEx = 2
Block_StringTable = 3
Block_Ignore = 4

block_type = Block_None
block_name = ''
block_caption = ''
begin = 0
block_begin = 0
block_items = []

lineno = 0
line_count = len(self.lines)
string_list = []

while lineno < line_count:
line = self.lines[lineno]
line = line.strip()
lineno += 1
if not line or line.startswith('//') or line.startswith('#'):
continue

if block_type == Block_None:
begin = 0
if self.match_line(line, 'STRINGTABLE'):
block_type = Block_StringTable
block_name = 'STRINGTABLE'
else:
items = line.split()
if len(items) >= 2:
if items[1] == 'MENU':
block_type = Block_Menu
block_name = ' '.join(items[:2])
elif items[1] == 'DIALOGEX':
block_type = Block_DialogEx
block_name = ' '.join(items[:2])
elif items[1] in ('ACCELERATORS', 'DESIGNINFO', 'TEXTINCLUDE'):
block_type = Block_Ignore
if block_type != Block_None:
block_begin = lineno
block_items = []
block_caption = ''
elif self.match_line(line, 'BEGIN'):
begin += 1
elif self.match_line(line, 'END'):
begin -= 1
if begin <= 0:
block_type = Block_None
if block_items:
string_list.append({
'name': block_name,
'comment': f'// line {block_begin} - {lineno}',
'caption': block_caption,
'items': block_items
})
block_items = []
elif block_type != Block_Ignore:
try:
if block_type == Block_Menu or block_type == Block_DialogEx:
caption = self.parse_resource_item(lineno, line, block_items)
if caption:
block_caption = caption
elif block_type == Block_StringTable:
lineno = self.parse_string_table_item(lineno, line, block_items)
except Exception as ex:
print(f'parse {block_type} {block_name} fail at {lineno} for {self.path}')
raise

if string_list:
self.save(string_list, out_path)

def save(self, string_list, out_path=None):
if not out_path:
path, ext = os.path.splitext(self.path)
out_path = path + '-string' + ext

print('save:', out_path)
with open(out_path, 'w', encoding='utf-8') as fd:
fd.write("//! Ignore line starts with //, it's a comment line.\n")
fd.write("//! Please don't translate escape sequence or format specifiers.\n")
fd.write('\n')
if self.reversion:
fd.write("//! Updated strings since: " + self.reversion + '\n')
for block in string_list:
fd.write(block['comment'] + '\n')
fd.write(block['name'] + '\n')
if caption := block.get('caption', None):
fd.write(caption + '\n')
fd.write('BEGIN' + '\n')
for item in block['items']:
fd.write('\t' + item['comment'] + '\n')
fd.write('\t' + item['value'] + '\n')
fd.write('END' + '\n\n')

def extract_resource_string(language, reversion):
print(f'{app}: extract updated string for {language} since {reversion}.')

extractor = StringExtractor()
if language == 'en':
extractor.extract(metapath_src, reversion)
extractor.extract(notepad2_src, reversion)
else:
folder = os.path.join(localeDir, language)
path = os.path.join(folder, 'metapath.rc')
extractor.extract(path, reversion)
path = os.path.join(folder, 'Notepad2.rc')
extractor.extract(path, reversion)


def show_help():
print(f"""Usage: {app} action language
print(f"""Usage: {app} action language [reversion]
action:
new create a new localization for specific language.
back prepare building standalone localized program for specific language,
copy back localized resources to overwrite English resources.
English resources are copied into en folder when the folder does not exist.""")
English resources are copied into en folder when the folder does not exist.
string extract all resource string or updated strings since specific reversion.
""")

def main():
if len(sys.argv) < 3:
Expand All @@ -159,6 +464,12 @@ def main():
print(f'{app}: language {language} not localized [{", ".join(availableLocales)}].');
return
copy_back_localized_resources(language)
elif action == 'string':
if language != 'en' and language not in availableLocales:
print(f'{app}: language {language} not localized [{", ".join(availableLocales)}].');
return
reversion = sys.argv[3] if len(sys.argv) > 3 else ''
extract_resource_string(language, reversion)
else:
show_help()

Expand Down
2 changes: 1 addition & 1 deletion wiki
Submodule wiki updated from f21f79 to e9d41c

0 comments on commit df0bc7c

Please sign in to comment.