Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

整理: replace_mora_pitch #974

Merged
merged 7 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions test/test_tts_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
split_mora,
to_flatten_moras,
to_flatten_phonemes,
unvoiced_mora_phoneme_list,
unvoiced_vowel_likes,
)

TRUE_NUM_PHONEME = 45
Expand Down Expand Up @@ -657,11 +657,11 @@ def test_update_pitch(self):
index = 1

def result_value(i: int) -> float:
# unvoiced_mora_phoneme_listのPhoneme ID版
unvoiced_mora_phoneme_id_list = [
Phoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
# unvoiced_vowel_likesのPhoneme ID版
unvoiced_vowel_like_ids = [
Phoneme(p).phoneme_id for p in unvoiced_vowel_likes
]
if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list:
if vowel_phoneme_list[i] in unvoiced_vowel_like_ids:
return 0
return (
vowel_phoneme_list[i]
Expand Down
68 changes: 25 additions & 43 deletions voicevox_engine/tts_pipeline/tts_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from .mora_list import openjtalk_mora2text
from .text_analyzer import text_to_accent_phrases

unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
unvoiced_vowel_likes = ["A", "I", "U", "E", "O", "cl", "pau"]
mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_vowel_likes

# 疑問文語尾定数
UPSPEAK_LENGTH = 0.15
Expand Down Expand Up @@ -286,14 +286,10 @@ def update_pitch(
self, accent_phrases: list[AccentPhrase], style_id: StyleId
) -> list[AccentPhrase]:
"""アクセント句系列に含まれるモーラの音高属性をスタイルに合わせて更新する"""
# numpy.concatenateが空リストだとエラーを返すのでチェック
# 後続のnumpy.concatenateが空リストだとエラーになるので別処理
if len(accent_phrases) == 0:
return []

# phoneme
# AccentPhraseをすべてMoraおよびPhonemeの形に分解し、処理可能な形にする
flatten_moras, phoneme_data_list = pre_process(accent_phrases)

# accent
def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
"""
Expand All @@ -320,7 +316,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
(0 if accent_phrase.pause_mora is not None else []),
]

# accent_phrasesから、アクセントの開始位置のリストを作る
# アクセントの開始/終了位置リストを作る
start_accent_list = numpy.concatenate(
[
# accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、
Expand All @@ -330,8 +326,6 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
for accent_phrase in accent_phrases
]
)

# accent_phrasesから、アクセントの終了位置のリストを作る
end_accent_list = numpy.concatenate(
[
# accentはプログラミング言語におけるindexのように0始まりではなく1始まりなので、1を引いている
Expand All @@ -340,69 +334,57 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int) -> ndarray:
]
)

# accent_phrasesから、アクセント句の開始位置のリストを作る
# これによって、yukarin_sa_forwarder内でアクセント句を区別できる
# アクセント句の開始/終了位置リストを作る
start_accent_phrase_list = numpy.concatenate(
[_create_one_hot(accent_phrase, 0) for accent_phrase in accent_phrases]
)

# accent_phrasesから、アクセント句の終了位置のリストを作る
end_accent_phrase_list = numpy.concatenate(
[_create_one_hot(accent_phrase, -1) for accent_phrase in accent_phrases]
)

# 最初と最後に0を付け加える。これによってpau(前後の無音のためのもの)を付け加えたことになる
# 前後無音を付加する
tarepan marked this conversation as resolved.
Show resolved Hide resolved
start_accent_list = numpy.r_[0, start_accent_list, 0]
end_accent_list = numpy.r_[0, end_accent_list, 0]
start_accent_phrase_list = numpy.r_[0, start_accent_phrase_list, 0]
end_accent_phrase_list = numpy.r_[0, end_accent_phrase_list, 0]

# アクセント・アクセント句関連のデータをyukarin_sa_forwarderに渡すための最終処理、リスト内のデータをint64に変換する
# キャスト
tarepan marked this conversation as resolved.
Show resolved Hide resolved
start_accent_list = numpy.array(start_accent_list, dtype=numpy.int64)
end_accent_list = numpy.array(end_accent_list, dtype=numpy.int64)
start_accent_phrase_list = numpy.array(
start_accent_phrase_list, dtype=numpy.int64
)
end_accent_phrase_list = numpy.array(end_accent_phrase_list, dtype=numpy.int64)

# phonemeに関するデータを取得(変換)する
(consonant_phoneme_data_list, vowel_phoneme_data_list) = split_mora(
phoneme_data_list
)
# アクセント句系列から(前後の無音含まない)モーラ系列と(前後の無音含む)音素系列を抽出する
moras, phonemes = pre_process(accent_phrases)

# yukarin_sa
# Phoneme関連のデータをyukarin_sa_forwarderに渡すための最終処理、リスト内のデータをint64に変換する
vowel_phoneme_list = numpy.array(
[p.phoneme_id for p in vowel_phoneme_data_list], dtype=numpy.int64
)
consonant_phoneme_list = numpy.array(
[
p.phoneme_id if p is not None else -1
for p in consonant_phoneme_data_list
],
dtype=numpy.int64,
# 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
consonants, vowels = split_mora(phonemes)
vowel_ids = numpy.array([p.phoneme_id for p in vowels], dtype=numpy.int64)
consonant_ids = numpy.array(
[p.phoneme_id if p else -1 for p in consonants], dtype=numpy.int64
)

# 今までに生成された情報をyukarin_sa_forwardにかけ、推論器によってモーラごとに適切な音高(ピッチ)を割り当てる
f0_list = self._core.safe_yukarin_sa_forward(
vowel_phoneme_list,
consonant_phoneme_list,
# コアを用いてモーラ音高を生成する
f0 = self._core.safe_yukarin_sa_forward(
vowel_ids,
consonant_ids,
start_accent_list,
end_accent_list,
start_accent_phrase_list,
end_accent_phrase_list,
style_id,
)

# 無声母音を含むMoraに関しては、音高(ピッチ)を0にする
for i, p in enumerate(vowel_phoneme_data_list):
if p.phoneme in unvoiced_mora_phoneme_list:
f0_list[i] = 0
# 母音が無声であるモーラは音高を 0 とする
for i, p in enumerate(vowels):
if p.phoneme in unvoiced_vowel_likes:
f0[i] = 0

# yukarin_sa_forwarderの結果をaccent_phrasesに反映する
# flatten_moras変数に展開された値を変更することでコード量を削減しつつaccent_phrases内のデータを書き換えている
for i, mora in enumerate(flatten_moras):
mora.pitch = f0_list[i + 1]
# 更新する
for i, mora in enumerate(moras):
mora.pitch = f0[i + 1]

return accent_phrases

Expand Down