-
Notifications
You must be signed in to change notification settings - Fork 487
/
compute_timestamp_ratio.py
55 lines (45 loc) · 1.63 KB
/
compute_timestamp_ratio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Compute output/input timestamp ratio.
usage: compute_timestamp_ratio.py [options] <data_root>
options:
--hparams=<parmas> Hyper parameters [default: ].
--preset=<json> Path of preset parameters (json).
-h, --help Show this help message and exit
"""
from docopt import docopt
import sys
import numpy as np
from hparams import hparams, hparams_debug_string
import train
from train import TextDataSource, MelSpecDataSource
from nnmnkwii.datasets import FileSourceDataset
from tqdm import trange
from deepvoice3_pytorch import frontend
if __name__ == "__main__":
args = docopt(__doc__)
data_root = args["<data_root>"]
preset = args["--preset"]
# Load preset if specified
if preset is not None:
with open(preset) as f:
hparams.parse_json(f.read())
# Override hyper parameters
hparams.parse(args["--hparams"])
assert hparams.name == "deepvoice3"
train._frontend = getattr(frontend, hparams.frontend)
# Code below
X = FileSourceDataset(TextDataSource(data_root))
Mel = FileSourceDataset(MelSpecDataSource(data_root))
in_sizes = []
out_sizes = []
for i in trange(len(X)):
x, m = X[i], Mel[i]
if X.file_data_source.multi_speaker:
x = x[0]
in_sizes.append(x.shape[0])
out_sizes.append(m.shape[0])
in_sizes = np.array(in_sizes)
out_sizes = np.array(out_sizes)
input_timestamps = np.sum(in_sizes)
output_timestamps = np.sum(out_sizes) / hparams.outputs_per_step / hparams.downsample_step
print(input_timestamps, output_timestamps, output_timestamps / input_timestamps)
sys.exit(0)