add usecase and finish readme

timedomain-tech · Jun 1, 2023 · 980403a · 980403a
1 parent 6fe773c
commit 980403a
Show file tree

Hide file tree

Showing 7 changed files with 114 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -18,3 +18,13 @@ Here is a brief description of each file:
         - **dict** contains syllable to phoneme dictionary. 
         - **phon_class** contains all phonemes, **head** is consonant and **tail** is vowel
 - ***cmudict.rep***: This file contains English pronouncing dictionary, ref: http://www.speech.cs.cmu.edu/cgi-bin/cmudict
+
+## Usage
+
+The `main.py` file contains basic use cases:
+1. Checking whether a phoneme is included in the system.
+2. Converting phonemes for Chinese, Japanese, and English.
+
+## Contributing
+
+Contributions and suggestions for modification are welcome. You can open an issue or send an email to sean.z@timedomain.ai.
diff --git a/docs/README_CN.md b/docs/README_CN.md
@@ -16,4 +16,16 @@
         - **syllable_alias** 表示每个音节可以有多种拼写。
         - **dict** 包含音节到音素的字典。
         - **phon_class** 包含所有音素，**head** 表示辅音，**tail** 表示元音。
-- ***cmudict.rep***：该文件包含英语发音词典，参考网址：http://www.speech.cs.cmu.edu/cgi-bin/cmudict
+- ***cmudict.rep***：该文件包含英语发音词典，参考网址：http://www.speech.cs.cmu.edu/cgi-bin/cmudict
+
+## 使用
+
+`main.py` 中包含了基本的用例:
+1. 检查音素是否包含在系统中
+2. 中文，日语，英语的音素转换
+
+## 贡献
+
+欢迎补充内容和讨论修改建议，可以提issue或者发邮件到 sean.z@timedomain.ai
+
+
diff --git a/main.py b/main.py
@@ -1,20 +1,55 @@
 import src.cmudict_reader as cmudict_reader
 import src.plan_reader as plan_reader
+import re
 
 
-def is_valid_phoneme(phoneme):
+# phoneme validation 
+# language：zh jp eng
+def is_valid_phoneme(phoneme, language):
+
+    assert language in ["zh", "jp", "eng"]
+
+    all_phonemes = []
+    for plan in [plan_reader.zh_plan, plan_reader.jp_plan, plan_reader.en_plan]:
+        if plan["language"] == language:
+            all_phonemes.extend(plan["phon_class"]["head"])
+            all_phonemes.extend(plan["phon_class"]["tail"])
+            break
+
+    return phoneme in all_phonemes
 
-    return False
 
 
 # 拼音到音素
 def pinyin_to_phoneme(pinyin):
-    pass
+    zh_plan = plan_reader.zh_plan
+
+    if pinyin in zh_plan["dict"]:
+        return zh_plan["dict"][pinyin]
+    elif pinyin in zh_plan["syllable_alias"]:
+        return zh_plan["dict"][zh_plan["syllable_alias"][pinyin]]
+    else:
+        return "pinyin not found"
+
 
 
 # 日本語変換音素
 def jp_word_to_phoneme(jp_word):
-    pass
+
+    jp_plan = plan_reader.jp_plan
+    jp_word2romaji = plan_reader.jp_word2romaji
+
+    if jp_word in jp_word2romaji:
+        jp_word = jp_word2romaji[jp_word]
+
+    if jp_word in jp_plan["dict"]:
+        return jp_plan["dict"][jp_word]
+    elif jp_word in jp_plan["syllable_alias"]:
+        return jp_plan["dict"][jp_plan["syllable_alias"][jp_word]]
+    else:
+        return "word not found"
+
+
 
 
 # english word to phoneme
@@ -23,10 +58,41 @@ def eng_word_to_phoneme(en_word):
     eng_dict = cmudict_reader.get_dict()
 
     word_key = en_word.upper()
-    phonemes = eng_dict[word_key]
-
-
-
-
-
 
+    if word_key in eng_dict:
+        syllables = eng_dict[word_key]
+
+        phonemes = []
+        for phn_list in syllables:
+            for i in range(len(phn_list)):
+                phn = phn_list[i]
+                if re.search(r'\d$', phn):
+                    phn = phn[:-1]
+                phonemes.append(phn.lower())
+
+        return phonemes
+    else:
+        return "word not found"
+
+
+if __name__ == "__main__":
+    print(is_valid_phoneme("ah", "eng"))
+
+    # pinyin_to_phoneme
+    print("==========================")
+    print(pinyin_to_phoneme("pin"))
+    print(pinyin_to_phoneme("lve"))
+    print(pinyin_to_phoneme("lue"))
+    print(pinyin_to_phoneme("asd"))
+
+    # jp_word_to_phoneme
+    print("==========================")
+    print(jp_word_to_phoneme("ヴぁ"))
+    print(jp_word_to_phoneme("ja"))
+    print(jp_word_to_phoneme("jya"))
+    print(jp_word_to_phoneme("asd"))
+
+    # eng_word_to_phoneme
+    print("==========================")
+    print(eng_word_to_phoneme("yesterday"))
+    print(eng_word_to_phoneme("sdasaf"))
diff --git a/resources/cmudict_cache.pickle b/resources/cmudict_cache.pickle
diff --git a/src/__pycache__/cmudict_reader.cpython-39.pyc b/src/__pycache__/cmudict_reader.cpython-39.pyc
diff --git a/src/__pycache__/plan_reader.cpython-39.pyc b/src/__pycache__/plan_reader.cpython-39.pyc
diff --git a/src/plan_reader.py b/src/plan_reader.py
@@ -8,3 +8,18 @@
 with open(ALL_PLAN_PATH, "r") as f:
     all_plan_dict = json.load(f)
 
+assert all_plan_dict is not None
+
+jp_word2romaji = all_plan_dict["jp_word2romaji"]
+
+for plan in all_plan_dict["plans"]:
+    if plan["language"] == "zh":
+        zh_plan = plan
+    elif plan["language"] == "jp":
+        jp_plan = plan
+    else:
+        en_plan = plan
+
+assert zh_plan is not None 
+assert jp_plan is not None 
+assert en_plan is not None