-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadd_inflections.py
46 lines (40 loc) · 1.42 KB
/
add_inflections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import sys
from bs4 import BeautifulSoup
# from https://github.com/coljac/kindle_eo_eng/blob/master/src/inflect.py
def inflect(key):
# ekz: blankkola maraglo
if ' ' in key:
return []
root = key[:-1]
if key.endswith("o"):
return [root + x for x in ["on", "oj", "ojn"]]
elif key.endswith("a"):
return [root + x for x in ["aj", "an", "ajn"]]
elif key.endswith("e"):
return [root + "en"]
elif key.endswith("i"):
return [root + x for x in ["as", "os", "is", "us", "u",
"ita", "ata", "ota",
"inta", "anta", "onta",
"intan", "antan", "ontan",
"intaj", "antaj", "ontaj",
"intajn", "antajn", "ontajn"]]
else:
return []
def main(path: str, output_path: str):
with open(path) as f:
soup = BeautifulSoup(f, 'html.parser')
entries = soup.find_all('idx:orth')
for tag in entries:
tag['value'] = tag['value'].strip().strip(',')
inflections = inflect(tag['value'])
if inflections:
inflection_tag = soup.new_tag("idx:infl")
for inflection in inflections:
new_tag = soup.new_tag("idx:iform", attrs={'value': inflection})
inflection_tag.append(new_tag)
tag.append(inflection_tag)
with open(output_path, 'w') as f:
f.write(str(soup))
if __name__ == '__main__':
main(sys.argv[1], sys.argv[2])