Skip to content

Commit

Permalink
feat: Auto localization;
Browse files Browse the repository at this point in the history
fix: remove single emoji form locale files.
  • Loading branch information
MZhao-ouo committed Oct 5, 2023
1 parent 26b41b9 commit dc5bb21
Show file tree
Hide file tree
Showing 8 changed files with 831 additions and 807 deletions.
20 changes: 10 additions & 10 deletions ChuanhuChatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ def create_new_model():
with gr.Row(visible=False):
with gr.Column(min_width=42, scale=1):
historyDeleteBtn = gr.Button(
i18n("🗑️"), elem_id="gr-history-delete-btn")
"🗑️", elem_id="gr-history-delete-btn")
with gr.Column(min_width=42, scale=1):
historyDownloadBtn = gr.Button(
i18n("⏬"), elem_id="gr-history-download-btn")
"⏬", elem_id="gr-history-download-btn")
with gr.Column(min_width=42, scale=1):
historyMarkdownDownloadBtn = gr.Button(
i18n("⤵️"), elem_id="gr-history-mardown-download-btn")
"⤵️", elem_id="gr-history-mardown-download-btn")
with gr.Row(visible=False):
with gr.Column(scale=6):
saveFileName = gr.Textbox(
Expand All @@ -104,9 +104,9 @@ def create_new_model():
)
with gr.Column(scale=1):
renameHistoryBtn = gr.Button(
i18n("💾 Rename Chat"), elem_id="gr-history-save-btn")
i18n("💾 保存对话"), elem_id="gr-history-save-btn")
exportMarkdownBtn = gr.Button(
i18n("📝 Export as Markdown"), elem_id="gr-markdown-export-btn")
i18n("📝 导出为 Markdown"), elem_id="gr-markdown-export-btn")

with gr.Column(elem_id="chuanhu-menu-footer"):
with gr.Row(elem_id="chuanhu-func-nav"):
Expand Down Expand Up @@ -181,10 +181,10 @@ def create_new_model():
with gr.Row(visible=False) as like_dislike_area:
with gr.Column(min_width=20, scale=1):
likeBtn = gr.Button(
i18n("👍"), elem_id="gr-like-btn")
"👍", elem_id="gr-like-btn")
with gr.Column(min_width=20, scale=1):
dislikeBtn = gr.Button(
i18n("👎"), elem_id="gr-dislike-btn")
"👎", elem_id="gr-dislike-btn")

with gr.Column(elem_id="toolbox-area", scale=1):
# For CSS setting, there is an extra box. Don't remove it.
Expand All @@ -203,7 +203,7 @@ def create_new_model():
value=INITIAL_SYSTEM_PROMPT,
lines=8
)
remain_system_prompt_checkbox = gr.Checkbox(
retain_system_prompt_checkbox = gr.Checkbox(
label=i18n("新建对话保留Prompt"), value=False, visible=True, elem_classes="switch-checkbox")
with gr.Accordion(label=i18n("加载Prompt模板"), open=False):
with gr.Column():
Expand Down Expand Up @@ -584,7 +584,7 @@ def create_greeting(request: gr.Request):

emptyBtn.click(
reset,
inputs=[current_model, remain_system_prompt_checkbox],
inputs=[current_model, retain_system_prompt_checkbox],
outputs=[chatbot, status_display, historySelectList, systemPromptTxt],
show_progress=True,
_js='(a,b)=>{return clearChatbot(a,b);}',
Expand Down Expand Up @@ -682,7 +682,7 @@ def create_greeting(request: gr.Request):
historyRefreshBtn.click(**refresh_history_args)
historyDeleteBtn.click(delete_chat_history, [current_model, historySelectList, user_name], [status_display, historySelectList, chatbot], _js='(a,b,c)=>{return showConfirmationDialog(a, b, c);}').then(
reset,
inputs=[current_model, remain_system_prompt_checkbox],
inputs=[current_model, retain_system_prompt_checkbox],
outputs=[chatbot, status_display, historySelectList, systemPromptTxt],
show_progress=True,
_js='(a,b)=>{return clearChatbot(a,b);}',
Expand Down
249 changes: 122 additions & 127 deletions locale/en_US.json

Large diffs are not rendered by default.

118 changes: 86 additions & 32 deletions locale/extract_locale.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import os
import json
import re
import os, json, re, sys
import aiohttp, asyncio
import commentjson

asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())

with open("config.json", "r", encoding="utf-8") as f:
config = commentjson.load(f)
api_key = config["openai_api_key"]
url = config["openai_api_base"] + "/v1/chat/completions" if "openai_api_base" in config else "https://api.openai.com/v1/chat/completions"


def get_current_strings():
pattern = r'i18n\s*\(\s*["\']([^"\']*(?:\)[^"\']*)?)["\']\s*\)'
Expand Down Expand Up @@ -40,7 +48,7 @@ def sort_strings(existing_translations):
sorted_translations = {}
# Add entries with (NOT USED) in their values
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
if "(NOT USED)" in value:
if "(🔴NOT USED)" in value:
sorted_translations[key] = value
# Add entries with empty values
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
Expand All @@ -54,31 +62,77 @@ def sort_strings(existing_translations):
return sorted_translations


current_strs = get_current_strings()

locale_files = []
# 遍历locale目录下的所有json文件
for dirpath, dirnames, filenames in os.walk("locale"):
for filename in filenames:
if filename.endswith(".json"):
locale_files.append(os.path.join(dirpath, filename))


for locale_filename in locale_files:
if "zh_CN" in locale_filename:
continue
locale_strs = get_locale_strings(locale_filename)

# Add new keys
for key in current_strs:
if key not in locale_strs:
locale_strs[key] = ""
# Add (NOT USED) to invalid keys
for key in locale_strs:
if key not in current_strs:
locale_strs[key] = "(NOT USED)" + locale_strs[key]

locale_strs = sort_strings(locale_strs)

with open(locale_filename, 'w', encoding='utf-8') as f:
json.dump(locale_strs, f, ensure_ascii=False, indent=4)
async def auto_translate(str, language):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
"temperature": f"{0}",
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"You are a translation program;\nYour job is to translate user input into {language};\nThe content you are translating is a string in the App;\nDo not explain emoji;\nIf input is only a emoji, please simply return origin emoji;\nPlease ensure that the translation results are concise and easy to understand."
},
{"role": "user", "content": f"{str}"}
],
}

async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=payload) as response:
data = await response.json()
return data["choices"][0]["message"]["content"]


async def main(auto=False):
current_strs = get_current_strings()
locale_files = []
# 遍历locale目录下的所有json文件
for dirpath, dirnames, filenames in os.walk("locale"):
for filename in filenames:
if filename.endswith(".json"):
locale_files.append(os.path.join(dirpath, filename))


for locale_filename in locale_files:
if "zh_CN" in locale_filename:
continue
locale_strs = get_locale_strings(locale_filename)

# Add new keys
new_keys = []
for key in current_strs:
if key not in locale_strs:
new_keys.append(key)
locale_strs[key] = ""
print(f"{locale_filename[7:-5]}'s new str: {len(new_keys)}")
# Add (NOT USED) to invalid keys
for key in locale_strs:
if key not in current_strs:
locale_strs[key] = "(🔴NOT USED)" + locale_strs[key]
print(f"{locale_filename[7:-5]}'s invalid str: {len(locale_strs) - len(current_strs)}")

locale_strs = sort_strings(locale_strs)

if auto:
tasks = []
non_translated_keys = []
for key in locale_strs:
if locale_strs[key] == "":
non_translated_keys.append(key)
tasks.append(auto_translate(key, locale_filename[7:-5]))
results = await asyncio.gather(*tasks)
for key, result in zip(non_translated_keys, results):
locale_strs[key] = "(🟡REVIEW NEEDED)" + result
print(f"{locale_filename[7:-5]}'s auto translated str: {len(non_translated_keys)}")

with open(locale_filename, 'w', encoding='utf-8') as f:
json.dump(locale_strs, f, ensure_ascii=False, indent=4)


if __name__ == "__main__":
auto = False
if len(sys.argv) > 1 and sys.argv[1] == "--auto":
auto = True
asyncio.run(main(auto))
Loading

0 comments on commit dc5bb21

Please sign in to comment.