-
Notifications
You must be signed in to change notification settings - Fork 537
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Export Kokoro 1.0 to sherpa-onnx (#1788)
- Loading branch information
1 parent
8677d83
commit 08cefe8
Showing
13 changed files
with
707 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
config.json | ||
*.json | ||
*.txt | ||
.add-meta-data.done | ||
voices |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Introduction | ||
|
||
This directory is for kokoro v1.0 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
|
||
|
||
import argparse | ||
import json | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import onnx | ||
import torch | ||
|
||
from generate_voices_bin import speaker2id | ||
|
||
|
||
def main(): | ||
model = onnx.load("./kokoro.onnx") | ||
style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu") | ||
|
||
id2speaker_str = "" | ||
speaker2id_str = "" | ||
sep = "" | ||
for s, i in speaker2id.items(): | ||
speaker2id_str += f"{sep}{s}->{i}" | ||
id2speaker_str += f"{sep}{i}->{s}" | ||
sep = "," | ||
|
||
meta_data = { | ||
"model_type": "kokoro", | ||
"language": "English", | ||
"has_espeak": 1, | ||
"sample_rate": 24000, | ||
"version": 2, | ||
"voice": "en-us", | ||
"style_dim": ",".join(map(str, style.shape)), | ||
"n_speakers": len(speaker2id), | ||
"id2speaker": id2speaker_str, | ||
"speaker2id": speaker2id_str, | ||
"speaker_names": ",".join(map(str, speaker2id.keys())), | ||
"model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files", | ||
"see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS", | ||
"see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M", | ||
"maintainer": "k2-fsa", | ||
"comment": "This is Kokoro v1.0, a multilingual TTS model, supporting English, Chinese, French, Japanese etc.", | ||
} | ||
|
||
print(model.metadata_props) | ||
|
||
while len(model.metadata_props): | ||
model.metadata_props.pop() | ||
|
||
for key, value in meta_data.items(): | ||
meta = model.metadata_props.add() | ||
meta.key = key | ||
meta.value = str(value) | ||
print("--------------------") | ||
|
||
print(model.metadata_props) | ||
|
||
onnx.save(model, "./kokoro.onnx") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
|
||
import json | ||
from pypinyin import phrases_dict, pinyin_dict | ||
from misaki import zh | ||
from typing import List, Tuple | ||
|
||
|
||
def generate_english_lexicon(kind: str): | ||
assert kind in ("us", "gb"), kind | ||
# If you want to add new words, please add them to | ||
# the user_defined dict. | ||
user_defined = { | ||
"Kokoro": "kˈOkəɹO", | ||
"Misaki": "misˈɑki", | ||
} | ||
|
||
user_defined_lower = dict() | ||
for k, v in user_defined.items(): | ||
user_defined_lower[k.lower()] = v | ||
|
||
with open(f"./{kind}_gold.json", encoding="utf-8") as f: | ||
gold = json.load(f) | ||
|
||
with open(f"./{kind}_silver.json", encoding="utf-8") as f: | ||
silver = json.load(f) | ||
|
||
# words in us_gold has a higher priority than those in s_silver, so | ||
# we put us_gold after us_silver below | ||
english = {**silver, **gold} | ||
|
||
lexicon = dict() | ||
for k, v in english.items(): | ||
k_lower = k.lower() | ||
|
||
if k_lower in user_defined_lower: | ||
print(f"{k} already exist in the user defined dict. Skip adding") | ||
continue | ||
|
||
if isinstance(v, str): | ||
lexicon[k_lower] = v | ||
else: | ||
assert isinstance(v, dict), (k, v) | ||
assert "DEFAULT" in v, (k, v) | ||
lexicon[k_lower] = v["DEFAULT"] | ||
|
||
return list(user_defined_lower.items()) + list(lexicon.items()) | ||
|
||
|
||
def generate_chinese_lexicon(): | ||
word_dict = pinyin_dict.pinyin_dict | ||
phrases = phrases_dict.phrases_dict | ||
|
||
g2p = zh.ZHG2P() | ||
lexicon = [] | ||
|
||
for key in word_dict: | ||
if not (0x4E00 <= key <= 0x9FFF): | ||
continue | ||
w = chr(key) | ||
tokens: str = g2p(w) | ||
lexicon.append((w, tokens)) | ||
|
||
for key in phrases: | ||
tokens: str = g2p(key) | ||
lexicon.append((key, tokens)) | ||
return lexicon | ||
|
||
|
||
def save(filename: str, lexicon: List[Tuple[str, str]]): | ||
with open(filename, "w", encoding="utf-8") as f: | ||
for word, phones in lexicon: | ||
tokens = " ".join(list(phones)) | ||
f.write(f"{word} {tokens}\n") | ||
|
||
|
||
def main(): | ||
us = generate_english_lexicon("us") | ||
gb = generate_english_lexicon("gb") | ||
zh = generate_chinese_lexicon() | ||
|
||
save("lexicon-us-en.txt", us) | ||
save("lexicon-gb-en.txt", gb) | ||
save("lexicon-zh.txt", zh) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.