-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmain.py
169 lines (125 loc) · 4.86 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import src.cmudict_reader as cmudict_reader
import src.plan_reader as plan_reader
import re
def save_all_phoneme_to_md(path="phonemes.md"):
with open(path, "w") as f:
f.write("# Phonemes\n")
f.write("| 语言 Language | 辅音 Heads | 元音 Tails |\n| --- | --- | --- |\n")
for plan in plan_reader.all_plans:
f.write("| {} | {} | {} |\n".format(plan["language"], plan["phon_class"]["head"], plan["phon_class"]["tail"]))
f.write("\n\n")
# phoneme validation
# language:zh jp eng spa
def is_valid_phoneme(phoneme, language):
all_phonemes = []
for plan in plan_reader.all_plans:
if plan["language"] == language:
all_phonemes.extend(plan["phon_class"]["head"])
all_phonemes.extend(plan["phon_class"]["tail"])
break
return phoneme in all_phonemes
# 拼音到音素
def pinyin_to_phoneme(pinyin):
zh_plan = plan_reader.zh_plan
if pinyin in zh_plan["dict"]:
return zh_plan["dict"][pinyin]
elif pinyin in zh_plan["syllable_alias"]:
return zh_plan["dict"][zh_plan["syllable_alias"][pinyin]]
else:
return "pinyin not found"
# 日本語変換音素
def jp_word_to_phoneme(jp_word):
jp_plan = plan_reader.jp_plan
jp_word2romaji = plan_reader.jp_word2romaji
if jp_word in jp_word2romaji:
jp_word = jp_word2romaji[jp_word]
if jp_word in jp_plan["dict"]:
return jp_plan["dict"][jp_word]
elif jp_word in jp_plan["syllable_alias"]:
return jp_plan["dict"][jp_plan["syllable_alias"][jp_word]]
else:
return "word not found"
def find_all_patterns(lst, target):
indices = []
n = len(target)
i = 0
while i <= len(lst) - n:
lst_slice = lst[i:i + n]
compare_slice = []
for phn in lst_slice:
if phn in plan_reader.en_plan["phon_class"]["tail"]:
compare_slice.append("vowel")
else:
compare_slice.append(phn)
if compare_slice == target:
indices.append(i)
i += n # Move i forward by n steps
else:
i += 1 # Move i forward by 1 step
return indices
def replace_elements(arr, start_idx, num_elements, sub_arr):
return arr[:start_idx] + sub_arr + arr[start_idx + num_elements:]
def find_and_replace_all_patterns(lst, target, replacement, replace_partial=False):
indices = find_all_patterns(lst, target)
for idx in indices:
if replace_partial:
lst = replace_elements(lst, idx, len(replacement), replacement)
else:
lst = replace_elements(lst, idx, len(target), replacement)
return lst
# t r -> tr
# d r -> dr
# s t vowel --> s d vowel
# s k vowel --> s g vowel
# s p vowel --> s b vowel
# s tr vowel --> s dr vowel
def eng_phoneme_normalize(syllable):
phonemes = []
for i in range(len(syllable)):
phn = syllable[i].lower()
if re.search(r'\d$', phn):
phn = phn[:-1]
phonemes.append(phn)
# phonemes = find_and_replace_all_patterns(phonemes, ['t', 'r'], ['tr'])
# phonemes = find_and_replace_all_patterns(phonemes, ['d', 'r'], ['dr'])
# phonemes = find_and_replace_all_patterns(phonemes, ['s', 't', 'vowel'], ['s', 'd'], True)
# phonemes = find_and_replace_all_patterns(phonemes, ['s', 'k', 'vowel'], ['s', 'g'], True)
# phonemes = find_and_replace_all_patterns(phonemes, ['s', 'p', 'vowel'], ['s', 'b'], True)
# phonemes = find_and_replace_all_patterns(phonemes, ['s', 'tr', 'vowel'], ['s', 'dr'], True)
return phonemes
# english word to syllable and phoneme
def eng_word_to_phoneme(en_word):
eng_dict = cmudict_reader.get_dict()
word_key = en_word.upper()
if word_key in eng_dict:
syllables = eng_dict[word_key]
syllables_normalized = []
for phn_list in syllables:
phonemes = eng_phoneme_normalize(phn_list)
syllables_normalized.append(phonemes)
return syllables_normalized
else:
return "word not found"
if __name__ == "__main__":
save_all_phoneme_to_md()
print(is_valid_phoneme("ah", "eng"))
print(is_valid_phoneme("xxsl", "spa"))
print(is_valid_phoneme("ts", "jp"))
# pinyin_to_phoneme
print("==========================")
print(pinyin_to_phoneme("pin"))
print(pinyin_to_phoneme("lve"))
print(pinyin_to_phoneme("lue"))
print(pinyin_to_phoneme("asd"))
# jp_word_to_phoneme
print("==========================")
print(jp_word_to_phoneme("ヴぁ"))
print(jp_word_to_phoneme("ja"))
print(jp_word_to_phoneme("jya"))
print(jp_word_to_phoneme("asd"))
# eng_word_to_phoneme
print("==========================")
print(eng_word_to_phoneme("yesterday"))
print(eng_word_to_phoneme("untrue"))
print(eng_word_to_phoneme("arrested"))
print(eng_word_to_phoneme("favorite"))