-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_json.py
40 lines (37 loc) · 987 Bytes
/
generate_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import json
import xml.etree.ElementTree as ET
from pathlib import Path
if __name__ == "__main__":
xml_folder = Path("./TextAsset")
langs = [
"BP",
"DE",
"EN",
"ES",
"FR",
"IT",
"JA",
"JP",
"KO",
"PT",
"RU",
"SC",
"ZH",
]
result = {l: {} for l in langs}
for file in xml_folder.iterdir():
lang = file.name[:2]
if lang not in langs:
continue
tree = ET.parse(file)
root = tree.getroot()
for i in root:
if not i.text:
continue
text = i.text.replace("<br>", "\n")
text = text.replace("<page>", "\n\n")
text = text.replace("<Page>", "\n\n").strip()
if text:
result[lang][i.get("name")] = text
with open("src/pages/all_text.json", "w", encoding="UTF-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)