From a32961bcb441c955ca9d7df879becb7c84c2ef52 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 11 Oct 2023 16:09:51 -0300 Subject: [PATCH 01/24] Add XTTS base training code --- TTS/tts/layers/xtts/trainer/dataset.py | 202 ++++++++++ TTS/tts/layers/xtts/trainer/gpt_trainer.py | 448 +++++++++++++++++++++ recipes/multilingual/xtts_v1/train_xtts.py | 364 +++++++++++++++++ 3 files changed, 1014 insertions(+) create mode 100644 TTS/tts/layers/xtts/trainer/dataset.py create mode 100644 TTS/tts/layers/xtts/trainer/gpt_trainer.py create mode 100644 recipes/multilingual/xtts_v1/train_xtts.py diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py new file mode 100644 index 0000000000..9736ae6cb0 --- /dev/null +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -0,0 +1,202 @@ +import os +import random +import sys +import numpy as np + +import torch +import torch.nn.functional as F +import torch.utils.data +import torchaudio +from torchaudio.backend.sox_io_backend import load as torchaudio_sox_load +from torchaudio.backend.soundfile_backend import load as torchaudio_soundfile_load +torch.set_num_threads(1) + +def key_samples_by_col(samples, col): + """Returns a dictionary of samples keyed by language.""" + samples_by_col = {} + for sample in samples: + col_val = sample[col] + assert isinstance(col_val, str) + if col_val not in samples_by_col: + samples_by_col[col_val] = [] + samples_by_col[col_val].append(sample) + return samples_by_col + + +def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate): + rel_clip = load_audio(gt_path, sample_rate) + sample_length = random.randint(min_sample_length, max_sample_length) + gap = rel_clip.shape[-1] - sample_length + if gap < 0: + sample_length = rel_clip.shape[-1] // 2 + gap = rel_clip.shape[-1] - sample_length + rand_start = random.randint(0, gap) + rand_end = rand_start+sample_length + rel_clip = rel_clip[:, rand_start:rand_end] + rel_clip = F.pad(rel_clip, pad=(0, max_sample_length - rel_clip.shape[-1])) + cond_idxs = [rand_start, rand_end] + return rel_clip, rel_clip.shape[-1], cond_idxs + + +def load_audio(audiopath, sampling_rate): + # better load setting following: https://github.com/faroit/python_audio_loading_benchmark + if audiopath[-4:] == '.mp3': + # it uses torchaudio with sox backend to load mp3 + audio, lsr = torchaudio_sox_load(audiopath) + else: + # it uses torchaudio soundfile backend to load all the others data type + audio, lsr = torchaudio_soundfile_load(audiopath) + + # stereo to mono if needed + if audio.size(0) != 1: + audio = torch.mean(audio, dim=0, keepdim=True) + + if lsr != sampling_rate: + audio = torchaudio.functional.resample(audio, lsr, sampling_rate) + + # Check some assumptions about audio range. This should be automatically fixed in load_wav_to_torch, but might not be in some edge cases, where we should squawk. + # '10' is arbitrarily chosen since it seems like audio will often "overdrive" the [-1,1] bounds. + if torch.any(audio > 10) or not torch.any(audio < 0): + print(f"Error with {audiopath}. Max={audio.max()} min={audio.min()}") + # clip audio invalid values + audio.clip_(-1, 1) + return audio + +class XTTSDataset(torch.utils.data.Dataset): + def __init__(self, config, samples, tokenizer, sample_rate): + self.config = config + model_args = config.model_args + self.failed_samples = set() + self.debug_failures = model_args.debug_loading_failures + self.max_conditioning_length = model_args.max_conditioning_length + self.min_conditioning_length = model_args.min_conditioning_length + + # self.samples = [] + # cache the samples and added type "0" for all samples + # ToDo: find a better way to deal with type + # for item in samples: + # self.samples.append([item['audio_file'], item["text"], 0]) + self.samples = samples + random.seed(config.training_seed) + # random.shuffle(self.samples) + random.shuffle(self.samples) + # order by language + self.samples = key_samples_by_col(self.samples, "language") + print(" > Sampling by language:", self.samples.keys()) + + # use always the output sampling rate to load in the highest quality + self.sample_rate = sample_rate + self.max_wav_len = model_args.max_wav_length + self.max_text_len = model_args.max_text_length + assert self.max_wav_len is not None and self.max_text_len is not None + + # load specific vocabulary + self.tokenizer = tokenizer + + def get_text(self, text, lang): + tokens = self.tokenizer.encode(text, lang) + tokens = torch.IntTensor(tokens) + assert not torch.any(tokens == 1), f"UNK token found in {text} -> {self.tokenizer.decode(tokens)}" + # The stop token should always be sacred. + assert not torch.any(tokens == 0), f"Stop token found in {text}" + return tokens + + def load_item(self, sample): + text = str(sample['text']) + tseq = self.get_text(text, sample["language"]) + audiopath = sample['audio_file'] + wav = load_audio(audiopath, self.sample_rate) + if text is None or len(text.strip()) == 0: + raise ValueError + if wav is None or wav.shape[-1] < (0.5 * self.sample_rate): + # Ultra short clips are also useless (and can cause problems within some models). + raise ValueError + + # get a slice from GT to condition the model + cond, cond_len, cond_idxs = get_prompt_slice(audiopath, self.max_conditioning_length, self.min_conditioning_length, self.sample_rate) + + return tseq, audiopath, wav, cond, cond_len, cond_idxs + + def __getitem__(self, index): + # select a random language + lang = random.choice(list(self.samples.keys())) + # select random sample + index = random.randint(0, len(self.samples[lang]) - 1) + sample = self.samples[lang][index] + # a unique id for each sampel to deal with fails + sample_id = lang+"_"+str(index) + + # ignore samples that we already know that is not valid ones + if sample_id in self.failed_samples: + if self.debug_failures: + print(f"Ignoring sample {sample['audio_file']} because it was already ignored before !!") + # call get item again to get other sample + return self[1] + + # try to load the sample, if fails added it to the failed samples list + try: + tseq, audiopath, wav, cond, cond_len, cond_idxs = self.load_item(sample) + except: + if self.debug_failures: + print(f"error loading {sample['audio_file']} {sys.exc_info()}") + self.failed_samples.add(sample_id) + return self[1] + + # check if the audio and text size limits and if it out of the limits, added it failed_samples + if wav is None or \ + (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) or \ + (self.max_text_len is not None and tseq.shape[0] > self.max_text_len): + # Basically, this audio file is nonexistent or too long to be supported by the dataset. + # It's hard to handle this situation properly. Best bet is to return the a random valid token and skew the dataset somewhat as a result. + if self.debug_failures and wav is not None and tseq is not None: + print(f"error loading {sample['audio_file']}: ranges are out of bounds; {wav.shape[-1]}, {tseq.shape[0]}") + self.failed_samples.add(sample_id) + return self[1] + + res = { + # 'real_text': text, + 'text': tseq, + 'text_lengths': torch.tensor(tseq.shape[0], dtype=torch.long), + 'wav': wav, + 'wav_lengths': torch.tensor(wav.shape[-1], dtype=torch.long), + 'filenames': audiopath, + 'conditioning': cond.unsqueeze(1), + 'cond_lens': torch.tensor(cond_len, dtype=torch.long), + 'cond_idxs': torch.tensor(cond_idxs), + } + return res + + def __len__(self): + return sum([len(v) for v in self.samples.values()]) + + def collate_fn(self, batch): + # convert list of dicts to dict of lists + B = len(batch) + batch = {k: [dic[k] for dic in batch] for k in batch[0]} + + # stack for features that already have the same shape + batch["wav_lengths"] = torch.stack(batch["wav_lengths"]) + batch["text_lengths"] = torch.stack(batch["text_lengths"]) + batch["conditioning"] = torch.stack(batch["conditioning"]) + batch["cond_lens"] = torch.stack(batch["cond_lens"]) + batch["cond_idxs"] = torch.stack(batch["cond_idxs"]) + max_text_len = batch["text_lengths"].max() + max_wav_len = batch["wav_lengths"].max() + + # create padding tensors + text_padded = torch.IntTensor(B, max_text_len) + wav_padded = torch.FloatTensor(B, 1, max_wav_len) + + # initialize tensors for zero padding + text_padded = text_padded.zero_() + wav_padded = wav_padded.zero_() + for i in range(B): + text = batch["text"][i] + text_padded[i, : batch["text_lengths"][i]] = torch.IntTensor(text) + wav = batch['wav'][i] + wav_padded[i, :, :batch["wav_lengths"][i]] = torch.FloatTensor(wav) + + batch["wav"] = wav_padded + batch["padded_text"] = text_padded + + return batch diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py new file mode 100644 index 0000000000..f73aeb056c --- /dev/null +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -0,0 +1,448 @@ +import os +from dataclasses import dataclass, field +from typing import Callable, Dict, List, Optional, Tuple, Union + +import torch +import torchaudio +import torch.nn as nn +from torch.nn import functional as F +from torch.utils.data import DataLoader +import sys + + +from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer +from TTS.tts.layers.xtts.gpt import GPT +from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig + +from TTS.tts.models.base_tts import BaseTTS +from coqpit import Coqpit + +from TTS.tts.configs.tortoise_config import TortoiseConfig +from TTS.tts.layers.tortoise.arch_utils import TorchMelSpectrogram + +from TTS.tts.datasets.dataset import TTSDataset + +from trainer.torch import DistributedSampler +from trainer.trainer_utils import get_optimizer, get_scheduler + + +from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset +from TTS.utils.io import load_fsspec + +from TTS.tts.layers.xtts.dvae import DiscreteVAE + +@dataclass +class GPTConfig(TortoiseConfig): + lr: float = 5e-06 + training_seed: int = 1 + optimizer_wd_only_on_weights: bool = False + use_weighted_loss: bool = False # TODO: move it to the base config + weighted_loss_attrs: dict = field(default_factory=lambda: {}) + weighted_loss_multipliers: dict = field(default_factory=lambda: {}) + + +@dataclass +class XttsAudioConfig(XttsAudioConfig): + dvae_sample_rate: int = 22050 + + +@dataclass +class GPTArgs(XttsArgs): + min_conditioning_length: int = 66150 + max_conditioning_length: int = 132300 + gpt_loss_text_ce_weight: float = 0.01 + gpt_loss_mel_ce_weight: float = 1.0 + gpt_num_audio_tokens: int = 8194 + debug_loading_failures: bool = False + max_wav_length: int = 255995 # ~11.6 seconds + max_text_length: int = 200 + tokenizer_file: str = "" + mel_norm_file: str = "https://coqui.gateway.scarf.sh/v0.14.0_models/mel_norms.pth" + dvae_checkpoint: str = "" + gpt_checkpoint: str = "" + vocoder: str = "" # overide vocoder key on the config to avoid json write issues + + +def callback_clearml_load_save(operation_type, model_info): + # return None means skip the file upload/log, returning model_info will continue with the log/upload + # you can also change the upload destination file name model_info.upload_filename or check the local file size with Path(model_info.local_model_path).stat().st_size + assert operation_type in ('load', 'save') + # print(operation_type, model_info.__dict__) + + if "similarities.pth" in model_info.__dict__['local_model_path']: + return None + + return model_info + +class GPTTrainer(BaseTTS): + def __init__(self, config: Coqpit): + """ + Tortoise GPT training class + """ + super().__init__(config, ap=None, tokenizer=None) + self.config = config + + self.tokenizer = VoiceBpeTokenizer(self.args.tokenizer_file) + + self.args.gpt_number_text_tokens = self.tokenizer.tokenizer.get_vocab_size() + self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]") + self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]") + + self.gpt = GPT( + layers=self.args.gpt_layers, + model_dim=self.args.gpt_n_model_channels, + start_text_token=self.args.gpt_start_text_token, + stop_text_token=self.args.gpt_stop_text_token, + heads=self.args.gpt_n_heads, + max_text_tokens=self.args.gpt_max_text_tokens, + max_mel_tokens=self.args.gpt_max_audio_tokens, + max_prompt_tokens=self.args.gpt_max_prompt_tokens, + number_text_tokens=self.args.gpt_number_text_tokens, + num_audio_tokens=self.args.gpt_num_audio_tokens, + start_audio_token=self.args.gpt_start_audio_token, + stop_audio_token=self.args.gpt_stop_audio_token, + ).cuda() + + + # load GPT if available + if self.args.gpt_checkpoint: + gpt_checkpoint = torch.load( + self.args.gpt_checkpoint, map_location=torch.device("cpu") + ) + # deal with coqui Trainer exported model + if "model" in gpt_checkpoint.keys() and "config" in gpt_checkpoint.keys(): + print("Coqui Trainer checkpoint detected! Converting it!") + gpt_checkpoint = gpt_checkpoint["model"] + states_keys = list(gpt_checkpoint.keys()) + for key in states_keys: + if "gpt." in key: + new_key = key.replace("gpt.", "") + gpt_checkpoint[new_key] = gpt_checkpoint[key] + del gpt_checkpoint[key] + else: + del gpt_checkpoint[key] + + # edit checkpoint if the number of tokens is changed to ensures the better transfer learning possible + if "text_embedding.weight" in gpt_checkpoint and gpt_checkpoint["text_embedding.weight"].shape != self.gpt.text_embedding.weight.shape: + num_new_tokens = self.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] + print(f" > Loading checkpoint with {num_new_tokens} additional tokens.") + + # add new tokens to a linear layer (text_head) + emb_g = gpt_checkpoint["text_embedding.weight"] + new_row = torch.randn(num_new_tokens, emb_g.shape[1]) + start_token_row = emb_g[-1, :] + emb_g = torch.cat([emb_g, new_row], axis=0) + emb_g[-1, :] = start_token_row + gpt_checkpoint["text_embedding.weight"] = emb_g + + # add new weights to the linear layer (text_head) + text_head_weight = gpt_checkpoint["text_head.weight"] + start_token_row = text_head_weight[-1, :] + new_entry = torch.randn(num_new_tokens, self.gpt.text_head.weight.shape[1]) + text_head_weight = torch.cat([text_head_weight, new_entry], axis=0) + text_head_weight[-1, :] = start_token_row + gpt_checkpoint["text_head.weight"] = text_head_weight + + # add new biases to the linear layer (text_head) + text_head_bias = gpt_checkpoint["text_head.bias"] + start_token_row = text_head_bias[-1] + new_bias_entry = torch.zeros(num_new_tokens) + text_head_bias = torch.cat([text_head_bias, new_bias_entry], axis=0) + text_head_bias[-1] = start_token_row + gpt_checkpoint["text_head.bias"] = text_head_bias + + self.gpt.load_state_dict(gpt_checkpoint, strict=True) + print(">> GPT weights restored from:", self.args.gpt_checkpoint) + else: + print(">> GPT weights randomly initialized! If you want you can specify a checkpoint in config.model_args.gpt_checkpoint") + + # Mel spectrogram extractor for conditioning + self.torch_mel_spectrogram = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.sample_rate) + + # Load DVAE + self.dvae = DiscreteVAE( + channels=80, + normalization=None, + positional_dims=1, + num_tokens=self.args.gpt_num_audio_tokens - 2, + codebook_dim=512, + hidden_dim=512, + num_resnet_blocks=3, + kernel_size=3, + num_layers=2, + use_transposed_convs=False, + ) + + self.dvae.eval() + if self.args.dvae_checkpoint: + dvae_checkpoint = torch.load( + self.args.dvae_checkpoint, map_location=torch.device("cpu") + ) + self.dvae.load_state_dict(dvae_checkpoint, strict=False) + print(">> DVAE weights restored from:", self.args.dvae_checkpoint) + else: + raise RuntimeError("You need to specify config.model_args.dvae_checkpoint path to be able to train the GPT decoder!!") + + # Mel spectrogram extractor for DVAE + self.torch_mel_spectrogram_dvae = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.dvae_sample_rate) + + @property + def device(self): + return next(self.parameters()).device + + def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_lens): + """ + Forward pass that uses both text and voice in either text conditioning mode or voice conditioning mode + (actuated by `text_first`). + + text_inputs: long tensor, (b,t) + text_lengths: long tensor, (b,) + mel_inputs: long tensor, (b,m) + wav_lengths: long tensor, (b,) + cond_mels: MEL float tensor, (b, num_samples, 80,t_m) + cond_lengths: long tensor, (b,) + """ + losses = self.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_lens=cond_lens) + return losses + + @torch.no_grad() + def test_run(self, assets) -> Tuple[Dict, Dict]: # pylint: disable=W0613 + return {}, {} + + def format_batch(self, batch: Dict) -> Dict: + return batch + + @torch.no_grad() # torch no grad to avoid gradients from the pre-processing and DVAE codes extraction + def format_batch_on_device(self, batch): + """Compute spectrograms on the device.""" + batch["text_lengths"] = batch["text_lengths"] + batch["wav_lengths"] = batch["wav_lengths"] + batch["text_inputs"] = batch["padded_text"] + batch["cond_lens"] = batch["cond_lens"] + batch["cond_idxs"] = batch["cond_idxs"] + # compute conditioning mel specs + # transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor + B, num_cond_samples, C, T = batch["conditioning"].size() + conditioning_reshaped = batch["conditioning"].view(B*num_cond_samples, C, T) + paired_conditioning_mel = self.torch_mel_spectrogram(conditioning_reshaped) + # transform torch.Size([B * num_cond_samples, n_mel, T_mel]) in torch.Size([B, num_cond_samples, n_mel, T_mel]) + n_mel = self.torch_mel_spectrogram.n_mel_channels # paired_conditioning_mel.size(1) + T_mel = paired_conditioning_mel.size(2) + paired_conditioning_mel = paired_conditioning_mel.view(B, num_cond_samples, n_mel, T_mel) + # get the conditioning embeddings + batch["cond_mels"] = paired_conditioning_mel + # compute codes using DVAE + if self.config.audio.sample_rate != self.config.audio.dvae_sample_rate: + dvae_wav = torchaudio.functional.resample( + batch["wav"], + orig_freq=self.config.audio.sample_rate, + new_freq=self.config.audio.dvae_sample_rate, + lowpass_filter_width=64, + rolloff=0.9475937167399596, + resampling_method="kaiser_window", + beta=14.769656459379492, + ) + else: + dvae_wav = batch["wav"] + dvae_mel_spec = self.torch_mel_spectrogram_dvae(dvae_wav) + codes = self.dvae.get_codebook_indices(dvae_mel_spec) + batch["audio_codes"] = codes + # delete useless batch tensors + del batch["padded_text"] + del batch["wav"] + del batch["conditioning"] + + return batch + + def train_step(self, batch, criterion): + loss_dict = {} + cond_mels = batch["cond_mels"] + text_inputs = batch["text_inputs"] + text_lengths = batch["text_lengths"] + audio_codes = batch["audio_codes"] + wav_lengths = batch["wav_lengths"] + + cond_lens=batch["cond_lens"] + # Todo: implement masking on the cond slice + cond_idxs = batch["cond_idxs"] + + loss_text, loss_mel, _ = self.forward(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_lens) + loss_dict["loss_text_ce"] = loss_text * self.args.gpt_loss_text_ce_weight + loss_dict["loss_mel_ce"] = loss_mel * self.args.gpt_loss_mel_ce_weight + loss_dict["loss"] = loss_dict["loss_text_ce"] + loss_dict["loss_mel_ce"] + return {"model_outputs": None}, loss_dict + + def eval_step(self, batch, criterion): + return self.train_step(batch, criterion) + + def on_epoch_start(self, trainer): # pylint: disable=W0613 + # guarante that dvae will be in eval mode after .train() on evaluation end + self.dvae = self.dvae.eval() + + def on_init_end(self, trainer): # pylint: disable=W0613 + # ignore similarities.pth on clearml save/upload + if self.config.dashboard_logger.lower() == "clearml": + from clearml.binding.frameworks import WeightsFileHandler + WeightsFileHandler.add_pre_callback(callback_clearml_load_save) + + @torch.no_grad() + def inference( + self, + x, + aux_input=None, + ): # pylint: disable=dangerous-default-value + return None + + @staticmethod + def get_criterion(): + return None + + def get_sampler(self, dataset: TTSDataset, num_gpus=1): + # sampler for DDP + batch_sampler = DistributedSampler(dataset) if num_gpus > 1 else None + return batch_sampler + + def get_data_loader( + self, + config: Coqpit, + assets: Dict, + is_eval: bool, + samples: Union[List[Dict], List[List]], + verbose: bool, + num_gpus: int, + rank: int = None, + ) -> "DataLoader": # pylint: disable=W0613 + if is_eval and not config.run_eval: + loader = None + else: + # Todo: remove the randomness of dataset when it is eval + # init dataloader + dataset = XTTSDataset(self.config, samples, self.tokenizer, config.audio.sample_rate) + + # wait all the DDP process to be ready + if num_gpus > 1: + torch.distributed.barrier() + + # sort input sequences from short to long + # dataset.preprocess_samples() + + # get samplers + sampler = self.get_sampler(dataset, num_gpus) + + # ignore sampler when is eval because if we changed the sampler parameter we will not be able to compare previous runs + if sampler is None or is_eval: + loader = DataLoader( + dataset, + batch_size=config.eval_batch_size if is_eval else config.batch_size, + shuffle=False, + drop_last=False, + collate_fn=dataset.collate_fn, + num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, + pin_memory=False, + ) + else: + loader = DataLoader( + dataset, + batch_sampler=sampler, + collate_fn=dataset.collate_fn, + num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, + pin_memory=False, + ) + return loader + + def get_optimizer(self) -> List: + """Initiate and return the optimizer based on the config parameters. + """ + # ToDo: deal with multi GPU training + if self.config.optimizer_wd_only_on_weights: + # parameters to only GPT model + net = self.gpt + + # normalizations + norm_modules = (nn.BatchNorm2d, nn.InstanceNorm2d, nn.BatchNorm1d, nn.InstanceNorm1d, + nn.BatchNorm3d, nn.InstanceNorm3d, nn.GroupNorm, nn.LayerNorm) + # nn.Embedding + emb_modules = (nn.Embedding, nn.EmbeddingBag) + + param_names_notweights = set() + all_param_names = set() + param_map = {} + for mn, m in net.named_modules(): + for k, v in m.named_parameters(): + v.is_bias = k.endswith(".bias") + v.is_weight = k.endswith(".weight") + v.is_norm = isinstance(m, norm_modules) + v.is_emb = isinstance(m, emb_modules) + + fpn = '%s.%s' % (mn, k) if mn else k # full param name + all_param_names.add(fpn) + param_map[fpn] = v + if v.is_bias or v.is_norm or v.is_emb: + param_names_notweights.add(fpn) + + params_names_notweights = sorted(list(param_names_notweights)) + params_notweights = [param_map[k] for k in params_names_notweights] + params_names_weights = sorted(list(all_param_names ^ param_names_notweights)) + params_weights = [param_map[k] for k in params_names_weights] + + groups = [ + { 'params': params_weights, 'weight_decay': self.config.optimizer_params["weight_decay"]}, + { 'params': params_notweights, 'weight_decay': 0} + ] + # torch.optim.AdamW + opt = get_optimizer( + self.config.optimizer, + self.config.optimizer_params, + self.config.lr, + parameters=groups, + ) + opt._group_names = [params_names_weights, params_names_notweights] + return opt + + return get_optimizer( + self.config.optimizer, + self.config.optimizer_params, + self.config.lr, + # optimize only for the GPT model + parameters=self.gpt.parameters(), + ) + + def get_scheduler(self, optimizer) -> List: + """Set the scheduler for the optimizer. + + Args: + optimizer: `torch.optim.Optimizer`. + """ + return get_scheduler(self.config.lr_scheduler, self.config.lr_scheduler_params, optimizer) + + def load_checkpoint( + self, + config, + checkpoint_path, + eval=False, + strict=True, + cache_storage="/tmp/tts_cache", + target_protocol="s3", + target_options={"anon": True}, + ): # pylint: disable=unused-argument, disable=W0201, disable=W0102, redefined-builtin + """Load the model checkpoint and setup for training or inference""" + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu")) + # load the model weights + self.gpt.load_state_dict(state, strict=strict) + + if eval: + self.eval() + self.set_inference() + assert not self.training + + @staticmethod + def init_from_config(config: "GPTConfig", samples: Union[List[List], List[Dict]] = None): + """Initiate model from config + + Args: + config (GPTConfig): Model config. + samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training. + Defaults to None. + """ + return GPTTrainer(config) + \ No newline at end of file diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py new file mode 100644 index 0000000000..fc2b5d8a7d --- /dev/null +++ b/recipes/multilingual/xtts_v1/train_xtts.py @@ -0,0 +1,364 @@ +from trainer import Trainer, TrainerArgs + +from TTS.config.shared_configs import BaseDatasetConfig +from TTS.tts.datasets import load_tts_samples + +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTTrainer, GPTArgs, XttsAudioConfig, GPTConfig + + +config_coqui_MLS_metadata_train_with_previous_audio_key_de = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_german", + meta_file_train="metadata_train_with_previous_audio_key.csv", + language="de", +) + + +config_coqui_MLS_metadata_test_with_previous_audio_key_de = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_german", + meta_file_train="metadata_test_with_previous_audio_key.csv", + language="de", +) + + +config_coqui_MLS_metadata_dev_with_previous_audio_key_de = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_german", + meta_file_train="metadata_dev_with_previous_audio_key.csv", + language="de", +) + + +config_coqui_mls_french_metadata_with_previous_audio_key_fr = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_french/", + meta_file_train="metadata_with_previous_audio_key.csv", + language="fr", +) + + +config_coqui_mls_spanish_metadata_with_previous_audio_key_es = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_spanish/", + meta_file_train="/raid/datasets/MLS/mls_spanish/metadata_with_previous_audio_key.csv", + language="es", +) + + +config_coqui_mls_italian_metadata_with_previous_audio_key_it = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_italian/", + meta_file_train="/raid/datasets/MLS/mls_italian/metadata_with_previous_audio_key.csv", + language="it", +) + + +config_coqui_mls_portuguese_metadata_with_previous_audio_key_pt = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_portuguese/", + meta_file_train="/raid/datasets/MLS/mls_portuguese/metadata_with_previous_audio_key.csv", + language="pt", +) + + +config_coqui_mls_polish_metadata_with_previous_audio_key_pl = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/MLS/mls_polish/", + meta_file_train="/raid/datasets/MLS/mls_polish/metadata_with_previous_audio_key.csv", + language="pl", +) + + +config_coqui_common_voice_metafile_it_train_with_scores_it = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_it_train_with_scores.csv", + language="it", +) + + +config_coqui_common_voice_metafile_it_test_with_scores_it = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_it_test_with_scores.csv", + language="it", +) + + +config_coqui_common_voice_metafile_it_dev_with_scores_it = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_it_dev_with_scores.csv", + language="it", +) + + +config_coqui_common_voice_metafile_pt_train_with_scores_pt = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_pt_train_with_scores.csv", + language="pt", +) + + +config_coqui_common_voice_metafile_pt_test_with_scores_pt = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_pt_test_with_scores.csv", + language="pt", +) + + +config_coqui_common_voice_metafile_pt_dev_with_scores_pt = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_pt_dev_with_scores.csv", + language="pt", +) + + +config_coqui_common_voice_metafile_en_train_en = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_en_train.csv", + language="en", +) + + +config_coqui_common_voice_metafile_en_test_en = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_en_test.csv", + language="en", +) + + +config_coqui_common_voice_metafile_en_dev_en = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_en_dev.csv", + language="en", +) + + +config_coqui_common_voice_metafile_tr_validated_tr = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_tr_validated.csv", + language="tr", +) + + +config_coqui_common_voice_metafile_ru_validated_ru = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_ru_validated.csv", + language="ru", +) + + +config_coqui_common_voice_metafile_nl_validated_nl = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_nl_validated.csv", + language="nl", +) + + +config_coqui_common_voice_metafile_cs_validated_cs = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_cs_validated.csv", + language="cs", +) + + +config_coqui_common_voice_metafile_fr_validated_fr = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_fr_validated.csv", + language="fr", +) + + +config_coqui_common_voice_metafile_es_validated_es = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_es_validated.csv", + language="es", +) + + +config_coqui_common_voice_metafile_pl_validated_pl = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_pl_validated.csv", + language="pl", +) + + +config_coqui_common_voice_metafile_ar_validated_ar = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_ar_validated.csv", + language="ar", +) + + +config_coqui_common_voice_metafile_zh_CN_validated_zh_cn = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_zh-CN_validated.csv", + language="zh-cn", +) + + +config_coqui_common_voice_metafile_ja_validated_ja = BaseDatasetConfig( + formatter="coqui", + dataset_name="coqui", + path="/raid/datasets/common_voice/", + meta_file_train="/raid/datasets/common_voice/metafile_ja_validated.csv", + language="ja", +) + +# DATASETS_CONFIG_LIST=[config_coqui_MLS_metadata_train_with_previous_audio_key_de, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_MLS_metadata_dev_with_previous_audio_key_de, config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it, config_coqui_mls_portuguese_metadata_with_previous_audio_key_pt, config_coqui_mls_polish_metadata_with_previous_audio_key_pl, config_coqui_common_voice_metafile_it_train_with_scores_it, config_coqui_common_voice_metafile_it_test_with_scores_it, config_coqui_common_voice_metafile_it_dev_with_scores_it, config_coqui_common_voice_metafile_pt_train_with_scores_pt, config_coqui_common_voice_metafile_pt_test_with_scores_pt, config_coqui_common_voice_metafile_pt_dev_with_scores_pt, config_coqui_common_voice_metafile_en_train_en, config_coqui_common_voice_metafile_en_test_en, config_coqui_common_voice_metafile_en_dev_en, config_coqui_common_voice_metafile_tr_validated_tr, config_coqui_common_voice_metafile_ru_validated_ru, config_coqui_common_voice_metafile_nl_validated_nl, config_coqui_common_voice_metafile_cs_validated_cs, config_coqui_common_voice_metafile_fr_validated_fr, config_coqui_common_voice_metafile_es_validated_es, config_coqui_common_voice_metafile_pl_validated_pl, config_coqui_common_voice_metafile_ar_validated_ar, config_coqui_common_voice_metafile_zh_CN_validated_zh_cn, config_coqui_common_voice_metafile_ja_validated_ja] + +# DATASETS_CONFIG_LIST = [config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it] + +DATASETS_CONFIG_LIST = [config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_italian_metadata_with_previous_audio_key_it] + +def freeze_layers(trainer): + pass + +def main(): + # init args and config + model_args = GPTArgs( + max_conditioning_length=132300, # 6 secs + min_conditioning_length=66150, # 3 secs + debug_loading_failures=True, + max_wav_length=255995, # ~11.6 seconds + max_text_length=200, + tokenizer_file="/raid/datasets/xtts_models/vocab.json", + mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", + dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", + gpt_checkpoint="/raid/datasets/xtts_models/gpt.pth", + gpt_num_audio_tokens=8194, + gpt_start_audio_token=8192, + gpt_stop_audio_token=8193, + ) + audio_config = XttsAudioConfig( + sample_rate=22050, # autoregressive SR + dvae_sample_rate=22050, + diffusion_sample_rate=24000, + output_sample_rate=24000 + ) + config = GPTConfig( + output_path=OUT_PATH, + model_args=model_args, + run_name=RUN_NAME, + project_name=PROJECT_NAME, + run_description=""" + GPT XTTS training + """, + dashboard_logger=DASHBOARD_LOGGER, + logger_uri=LOGGER_URI, + audio=audio_config, + batch_size=BATCH_SIZE, + batch_group_size=48, + eval_batch_size=BATCH_SIZE, + num_loader_workers=8, + eval_split_max_size=256, + print_step=50, + plot_step=100, + log_model_step=1000, + save_step=10000, + save_n_checkpoints=1, + save_checkpoints=True, + # target_loss="loss", + print_eval=False, + # Optimizer values like tortoise. However, they used pytorch implementation with modifications to not apply WD to non-weight parameters. We are using default Pytorch + optimizer="AdamW", + optimizer_wd_only_on_weights=True, + optimizer_params={"betas": [.9, .96], "eps": 1e-8, "weight_decay": 1e-2}, + lr=5e-06, # learning rate + # lr=1e-4, # learning rate + # ToDo: implement 500 step warmup like tortoise and EMA weights replaces LR decay with rate: .999 + lr_scheduler="MultiStepLR", + # it was adjusted accordly for the new step scheme + lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, + ) + + # init the model from config + model = GPTTrainer.init_from_config(config) + + # load training samples + train_samples, eval_samples = load_tts_samples( + DATASETS_CONFIG_LIST, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, + ) + + # init the trainer and 🚀 + trainer = Trainer( + TrainerArgs(restore_path=RESTORE_PATH, skip_train_epoch=SKIP_TRAIN_EPOCH, start_with_eval=START_WITH_EVAL, grad_accum_steps=GRAD_ACUMM_STEPS), + config, + output_path=OUT_PATH, + model=model, + train_samples=train_samples, + eval_samples=eval_samples, + callbacks={"on_epoch_start": freeze_layers} + ) + trainer.fit() + + +if __name__ == "__main__": + RUN_NAME = "GPT_XTTS" + PROJECT_NAME = "XTTS" + OUT_PATH = "/raid/edresson/dev/Checkpoints/XTTS_style_emb/" + DASHBOARD_LOGGER = "clearml" + LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_style_emb/" + RESTORE_PATH = None + SKIP_TRAIN_EPOCH = False + START_WITH_EVAL = True + BATCH_SIZE = 9 + GRAD_ACUMM_STEPS = 28 + + # debug + DASHBOARD_LOGGER = "tensorboard" + LOGGER_URI = None + RESTORE_PATH = None + BATCH_SIZE = 2 + GRAD_ACUMM_STEPS = 1 + NUM_LOADERS = 1 + + + + main() From 40a4e631ea9586f24a0f8758a626ffd40bfc2bc2 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 11 Oct 2023 19:04:18 -0300 Subject: [PATCH 02/24] Update mel spectrogram for the style encoder --- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index f73aeb056c..6494f3364f 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -157,7 +157,17 @@ def __init__(self, config: Coqpit): print(">> GPT weights randomly initialized! If you want you can specify a checkpoint in config.model_args.gpt_checkpoint") # Mel spectrogram extractor for conditioning - self.torch_mel_spectrogram = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.sample_rate) + self.torch_mel_spectrogram_style_encoder = TorchMelSpectrogram( + filter_length=4096, + hop_length=1024, + win_length=4096, + normalize=False, + sampling_rate=config.audio.sample_rate, + mel_fmin=0, + mel_fmax=8000, + n_mel_channels=80, + mel_norm_file=self.args.mel_norm_file + ) # Load DVAE self.dvae = DiscreteVAE( @@ -224,9 +234,9 @@ def format_batch_on_device(self, batch): # transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor B, num_cond_samples, C, T = batch["conditioning"].size() conditioning_reshaped = batch["conditioning"].view(B*num_cond_samples, C, T) - paired_conditioning_mel = self.torch_mel_spectrogram(conditioning_reshaped) + paired_conditioning_mel = self.torch_mel_spectrogram_style_encoder(conditioning_reshaped) # transform torch.Size([B * num_cond_samples, n_mel, T_mel]) in torch.Size([B, num_cond_samples, n_mel, T_mel]) - n_mel = self.torch_mel_spectrogram.n_mel_channels # paired_conditioning_mel.size(1) + n_mel = self.torch_mel_spectrogram_style_encoder.n_mel_channels # paired_conditioning_mel.size(1) T_mel = paired_conditioning_mel.size(2) paired_conditioning_mel = paired_conditioning_mel.view(B, num_cond_samples, n_mel, T_mel) # get the conditioning embeddings From 47d613df3af2717314595bf06665b60833786cef Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Fri, 13 Oct 2023 15:49:37 -0300 Subject: [PATCH 03/24] Add reproducible evaluation --- TTS/tts/layers/xtts/trainer/dataset.py | 71 ++++++++++++++-------- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 2 +- recipes/multilingual/xtts_v1/train_xtts.py | 2 +- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 9736ae6cb0..3ac22b5dcb 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -63,35 +63,48 @@ def load_audio(audiopath, sampling_rate): return audio class XTTSDataset(torch.utils.data.Dataset): - def __init__(self, config, samples, tokenizer, sample_rate): + def __init__(self, config, samples, tokenizer, sample_rate, is_eval=False): self.config = config model_args = config.model_args self.failed_samples = set() self.debug_failures = model_args.debug_loading_failures self.max_conditioning_length = model_args.max_conditioning_length self.min_conditioning_length = model_args.min_conditioning_length - - # self.samples = [] - # cache the samples and added type "0" for all samples - # ToDo: find a better way to deal with type - # for item in samples: - # self.samples.append([item['audio_file'], item["text"], 0]) - self.samples = samples - random.seed(config.training_seed) - # random.shuffle(self.samples) - random.shuffle(self.samples) - # order by language - self.samples = key_samples_by_col(self.samples, "language") - print(" > Sampling by language:", self.samples.keys()) - - # use always the output sampling rate to load in the highest quality + self.is_eval = is_eval + self.tokenizer = tokenizer self.sample_rate = sample_rate self.max_wav_len = model_args.max_wav_length self.max_text_len = model_args.max_text_length assert self.max_wav_len is not None and self.max_text_len is not None - # load specific vocabulary - self.tokenizer = tokenizer + self.samples = samples + if not is_eval: + random.seed(config.training_seed) + # random.shuffle(self.samples) + random.shuffle(self.samples) + # order by language + self.samples = key_samples_by_col(self.samples, "language") + print(" > Sampling by language:", self.samples.keys()) + else: + # for evaluation load and check samples that are corrupted to ensures the reproducibility + self.check_eval_samples() + + def check_eval_samples(self): + print("Filtering invalid eval samples!!") + new_samples = [] + for sample in self.samples: + try: + tseq, _, wav, _, _, _ = self.load_item(sample) + except: + pass + # Basically, this audio file is nonexistent or too long to be supported by the dataset. + if wav is None or \ + (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) or \ + (self.max_text_len is not None and tseq.shape[0] > self.max_text_len): + continue + new_samples.append(sample) + self.samples = new_samples + print("Total eval samples after filtering:", len(self.samples)) def get_text(self, text, lang): tokens = self.tokenizer.encode(text, lang) @@ -118,13 +131,17 @@ def load_item(self, sample): return tseq, audiopath, wav, cond, cond_len, cond_idxs def __getitem__(self, index): - # select a random language - lang = random.choice(list(self.samples.keys())) - # select random sample - index = random.randint(0, len(self.samples[lang]) - 1) - sample = self.samples[lang][index] - # a unique id for each sampel to deal with fails - sample_id = lang+"_"+str(index) + if self.is_eval: + sample = self.samples[index] + sample_id = str(index) + else: + # select a random language + lang = random.choice(list(self.samples.keys())) + # select random sample + index = random.randint(0, len(self.samples[lang]) - 1) + sample = self.samples[lang][index] + # a unique id for each sampel to deal with fails + sample_id = lang+"_"+str(index) # ignore samples that we already know that is not valid ones if sample_id in self.failed_samples: @@ -167,11 +184,14 @@ def __getitem__(self, index): return res def __len__(self): + if self.is_eval: + return len(self.samples) return sum([len(v) for v in self.samples.values()]) def collate_fn(self, batch): # convert list of dicts to dict of lists B = len(batch) + batch = {k: [dic[k] for dic in batch] for k in batch[0]} # stack for features that already have the same shape @@ -198,5 +218,4 @@ def collate_fn(self, batch): batch["wav"] = wav_padded batch["padded_text"] = text_padded - return batch diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 6494f3364f..71cfd6e4aa 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -327,7 +327,7 @@ def get_data_loader( else: # Todo: remove the randomness of dataset when it is eval # init dataloader - dataset = XTTSDataset(self.config, samples, self.tokenizer, config.audio.sample_rate) + dataset = XTTSDataset(self.config, samples, self.tokenizer, config.audio.sample_rate, is_eval) # wait all the DDP process to be ready if num_gpus > 1: diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py index fc2b5d8a7d..4e987a4f73 100644 --- a/recipes/multilingual/xtts_v1/train_xtts.py +++ b/recipes/multilingual/xtts_v1/train_xtts.py @@ -355,7 +355,7 @@ def main(): DASHBOARD_LOGGER = "tensorboard" LOGGER_URI = None RESTORE_PATH = None - BATCH_SIZE = 2 + BATCH_SIZE = 10 GRAD_ACUMM_STEPS = 1 NUM_LOADERS = 1 From bafab049c210263c26e13659c6445ccb213f2d67 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 16 Oct 2023 09:06:59 -0300 Subject: [PATCH 04/24] Add prompting masking --- TTS/tts/layers/xtts/gpt.py | 67 +++++++++++++--------- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 14 ++--- recipes/multilingual/xtts_v1/train_xtts.py | 4 +- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index 88ce100c72..dce8a137ea 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -233,6 +233,7 @@ def get_logits( prompt=None, get_attns=False, return_latent=False, + attn_mask_cond=None, attn_mask_text=None, attn_mask_mel=None, ): @@ -248,8 +249,12 @@ def get_logits( if attn_mask_text is not None: attn_mask = torch.cat([attn_mask_text, attn_mask_mel], dim=1) if prompt is not None: - attn_mask_prompt = torch.ones(prompt.shape[0], offset, dtype=torch.bool, device=emb.device) - attn_mask = torch.cat([attn_mask_prompt, attn_mask], dim=1) + if attn_mask_cond is not None: + attn_mask = torch.cat([attn_mask_cond, attn_mask], dim=1) + else: + attn_mask_cond = torch.ones(prompt.shape[0], offset, dtype=torch.bool, device=emb.device) + attn_mask = torch.cat([attn_mask_cond, attn_mask], dim=1) + gpt_out = self.gpt( inputs_embeds=emb, @@ -326,7 +331,7 @@ def get_prompts(self, prompt_codes): prompt = F.pad(prompt, (0, 1), value=self.stop_prompt_token) return prompt - def get_style_emb(self, cond_input, cond_lens=None, cond_seg_len=None, return_latent=False, sample=True): + def get_style_emb(self, cond_input, return_latent=False): """ cond_input: (b, 80, s) or (b, 1, 80, s) conds: (b, 1024, s) @@ -335,26 +340,7 @@ def get_style_emb(self, cond_input, cond_lens=None, cond_seg_len=None, return_la if not return_latent: if cond_input.ndim == 4: cond_input = cond_input.squeeze(1) - if sample: - _len_secs = random.randint(2, 6) # in secs - cond_seg_len = int((22050 / 1024) * _len_secs) # in frames - if cond_input.shape[-1] >= cond_seg_len: - new_conds = [] - for i in range(cond_input.shape[0]): - cond_len = int(cond_lens[i] / 1024) - if cond_len < cond_seg_len: - start = 0 - else: - start = random.randint(0, cond_len - cond_seg_len) - cond_vec = cond_input[i, :, start : start + cond_seg_len] - new_conds.append(cond_vec) - conds = torch.stack(new_conds, dim=0) - else: - cond_seg_len = 5 if cond_seg_len is None else cond_seg_len # secs - cond_frame_len = int((22050 / 1024) * cond_seg_len) - conds = cond_input[:, :, -cond_frame_len:] - - conds = self.conditioning_encoder(conds) + conds = self.conditioning_encoder(cond_input) else: # already computed conds = cond_input.unsqueeze(1) @@ -366,10 +352,9 @@ def forward( text_lengths, audio_codes, wav_lengths, - cond_lens=None, cond_mels=None, + cond_idxs=None, cond_latents=None, - loss_weights=None, return_attentions=False, return_latent=False, ): @@ -377,11 +362,12 @@ def forward( Forward pass that uses both text and voice in either text conditioning mode or voice conditioning mode (actuated by `text_first`). - cond_mels: MEL float tensor, (b, 1, 80,s) text_inputs: long tensor, (b,t) text_lengths: long tensor, (b,) mel_inputs: long tensor, (b,m) wav_lengths: long tensor, (b,) + cond_mels: MEL float tensor, (b, 1, 80,s) + cond_idxs: cond start and end indexs, (b, 2) If return_attentions is specified, only logits are returned. If return_latent is specified, loss & logits are not computed or returned. Only the predicted latents are returned. @@ -393,6 +379,11 @@ def forward( max_text_len = text_lengths.max() code_lengths = torch.ceil(wav_lengths / self.code_stride_len).long() + 3 + if cond_idxs is not None: + # recompute cond idxs for mel lengths + for idx, l in enumerate(code_lengths): + cond_idxs[idx] = cond_idxs[idx] / self.code_stride_len + # If len(codes) + 3 is larger than maxiumum allowed length, we truncate the codes. max_mel_len = code_lengths.max() @@ -435,9 +426,16 @@ def forward( ) # Set attn_mask + attn_mask_cond = None attn_mask_text = None attn_mask_mel = None if not return_latent: + attn_mask_cond = torch.ones( + cond_mels.shape[0], + cond_mels.shape[-1], + dtype=torch.bool, + device=text_inputs.device, + ) attn_mask_text = torch.ones( text_inputs.shape[0], text_inputs.shape[1], @@ -451,6 +449,11 @@ def forward( device=audio_codes.device, ) + if cond_idxs is not None: + for idx, r in enumerate(cond_idxs.squeeze()): + l = r[1] - r[0] + attn_mask_cond[idx, l : ] = 0.0 + for idx, l in enumerate(text_lengths): attn_mask_text[idx, l + 1 :] = 0.0 @@ -465,7 +468,7 @@ def forward( # Compute speech conditioning input if cond_latents is None: - cond_latents = self.get_style_emb(cond_mels, cond_lens).transpose(1, 2) + cond_latents = self.get_style_emb(cond_mels).transpose(1, 2) # Get logits sub = -5 # don't ask me why 😄 @@ -480,6 +483,7 @@ def forward( prompt=cond_latents, get_attns=return_attentions, return_latent=return_latent, + attn_mask_cond=attn_mask_cond, attn_mask_text=attn_mask_text, attn_mask_mel=attn_mask_mel, ) @@ -495,12 +499,19 @@ def forward( for idx, l in enumerate(code_lengths): mel_targets[idx, l + 1 :] = -1 - + # check if stoptoken is in every row of mel_targets assert (mel_targets == self.stop_audio_token).sum() >= mel_targets.shape[ 0 ], f" ❗ mel_targets does not contain stop token ({self.stop_audio_token}) in every row." + # ignore the loss for the segment used for conditioning + # coin flip for the segment to be ignored + if cond_idxs is not None: + cond_start = cond_idxs[idx, 0] + cond_end = cond_idxs[idx, 1] + mel_targets[idx, cond_start:cond_end] = -1 + # Compute losses loss_text = F.cross_entropy( text_logits, text_targets.long(), ignore_index=-1, label_smoothing=self.label_smoothing diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 71cfd6e4aa..e8e5752f8a 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -36,7 +36,6 @@ class GPTConfig(TortoiseConfig): lr: float = 5e-06 training_seed: int = 1 optimizer_wd_only_on_weights: bool = False - use_weighted_loss: bool = False # TODO: move it to the base config weighted_loss_attrs: dict = field(default_factory=lambda: {}) weighted_loss_multipliers: dict = field(default_factory=lambda: {}) @@ -200,7 +199,7 @@ def __init__(self, config: Coqpit): def device(self): return next(self.parameters()).device - def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_lens): + def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_idxs): """ Forward pass that uses both text and voice in either text conditioning mode or voice conditioning mode (actuated by `text_first`). @@ -209,10 +208,10 @@ def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels text_lengths: long tensor, (b,) mel_inputs: long tensor, (b,m) wav_lengths: long tensor, (b,) - cond_mels: MEL float tensor, (b, num_samples, 80,t_m) + cond_idxs: cond start and end indexs, (b, 2) cond_lengths: long tensor, (b,) """ - losses = self.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_lens=cond_lens) + losses = self.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs) return losses @torch.no_grad() @@ -228,7 +227,6 @@ def format_batch_on_device(self, batch): batch["text_lengths"] = batch["text_lengths"] batch["wav_lengths"] = batch["wav_lengths"] batch["text_inputs"] = batch["padded_text"] - batch["cond_lens"] = batch["cond_lens"] batch["cond_idxs"] = batch["cond_idxs"] # compute conditioning mel specs # transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor @@ -261,7 +259,7 @@ def format_batch_on_device(self, batch): del batch["padded_text"] del batch["wav"] del batch["conditioning"] - + del batch["cond_lens"] return batch def train_step(self, batch, criterion): @@ -272,11 +270,10 @@ def train_step(self, batch, criterion): audio_codes = batch["audio_codes"] wav_lengths = batch["wav_lengths"] - cond_lens=batch["cond_lens"] # Todo: implement masking on the cond slice cond_idxs = batch["cond_idxs"] - loss_text, loss_mel, _ = self.forward(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_lens) + loss_text, loss_mel, _ = self.forward(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_idxs) loss_dict["loss_text_ce"] = loss_text * self.args.gpt_loss_text_ce_weight loss_dict["loss_mel_ce"] = loss_mel * self.args.gpt_loss_mel_ce_weight loss_dict["loss"] = loss_dict["loss_text_ce"] + loss_dict["loss_mel_ce"] @@ -325,7 +322,6 @@ def get_data_loader( if is_eval and not config.run_eval: loader = None else: - # Todo: remove the randomness of dataset when it is eval # init dataloader dataset = XTTSDataset(self.config, samples, self.tokenizer, config.audio.sample_rate, is_eval) diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py index 4e987a4f73..429e4e3af9 100644 --- a/recipes/multilingual/xtts_v1/train_xtts.py +++ b/recipes/multilingual/xtts_v1/train_xtts.py @@ -253,7 +253,7 @@ # DATASETS_CONFIG_LIST = [config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it] DATASETS_CONFIG_LIST = [config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_italian_metadata_with_previous_audio_key_it] - + def freeze_layers(trainer): pass @@ -262,7 +262,7 @@ def main(): model_args = GPTArgs( max_conditioning_length=132300, # 6 secs min_conditioning_length=66150, # 3 secs - debug_loading_failures=True, + debug_loading_failures=False, max_wav_length=255995, # ~11.6 seconds max_text_length=200, tokenizer_file="/raid/datasets/xtts_models/vocab.json", From 2f868dd5c256f2cb92c5d9cbfaeb8b79ba38b938 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 16 Oct 2023 09:28:32 -0300 Subject: [PATCH 05/24] Bug fix on reproducible evaluation --- TTS/tts/layers/xtts/gpt.py | 1 - TTS/tts/layers/xtts/trainer/dataset.py | 22 ++++++++++++++++------ TTS/tts/layers/xtts/trainer/gpt_trainer.py | 8 ++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index dce8a137ea..52086f1368 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -254,7 +254,6 @@ def get_logits( else: attn_mask_cond = torch.ones(prompt.shape[0], offset, dtype=torch.bool, device=emb.device) attn_mask = torch.cat([attn_mask_cond, attn_mask], dim=1) - gpt_out = self.gpt( inputs_embeds=emb, diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 3ac22b5dcb..b122fc8aac 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -23,14 +23,24 @@ def key_samples_by_col(samples, col): return samples_by_col -def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate): +def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate, is_eval=False): rel_clip = load_audio(gt_path, sample_rate) - sample_length = random.randint(min_sample_length, max_sample_length) + # if eval uses a middle size sample when it is possible to be more reproducible + if is_eval: + sample_length = int((min_sample_length + max_sample_length)/2) + else: + sample_length = random.randint(min_sample_length, max_sample_length) gap = rel_clip.shape[-1] - sample_length if gap < 0: sample_length = rel_clip.shape[-1] // 2 gap = rel_clip.shape[-1] - sample_length - rand_start = random.randint(0, gap) + + # if eval start always from the position 0 to be more reproducible + if is_eval: + rand_start = 0 + else: + rand_start = random.randint(0, gap) + rand_end = rand_start+sample_length rel_clip = rel_clip[:, rand_start:rand_end] rel_clip = F.pad(rel_clip, pad=(0, max_sample_length - rel_clip.shape[-1])) @@ -90,7 +100,7 @@ def __init__(self, config, samples, tokenizer, sample_rate, is_eval=False): self.check_eval_samples() def check_eval_samples(self): - print("Filtering invalid eval samples!!") + print(" > Filtering invalid eval samples!!") new_samples = [] for sample in self.samples: try: @@ -104,7 +114,7 @@ def check_eval_samples(self): continue new_samples.append(sample) self.samples = new_samples - print("Total eval samples after filtering:", len(self.samples)) + print(" > Total eval samples after filtering:", len(self.samples)) def get_text(self, text, lang): tokens = self.tokenizer.encode(text, lang) @@ -126,7 +136,7 @@ def load_item(self, sample): raise ValueError # get a slice from GT to condition the model - cond, cond_len, cond_idxs = get_prompt_slice(audiopath, self.max_conditioning_length, self.min_conditioning_length, self.sample_rate) + cond, cond_len, cond_idxs = get_prompt_slice(audiopath, self.max_conditioning_length, self.min_conditioning_length, self.sample_rate, self.is_eval) return tseq, audiopath, wav, cond, cond_len, cond_idxs diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index e8e5752f8a..d884f12a79 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -208,8 +208,8 @@ def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels text_lengths: long tensor, (b,) mel_inputs: long tensor, (b,m) wav_lengths: long tensor, (b,) + cond_mels: MEL float tensor, (b, num_samples, 80,t_m) cond_idxs: cond start and end indexs, (b, 2) - cond_lengths: long tensor, (b,) """ losses = self.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs) return losses @@ -269,10 +269,8 @@ def train_step(self, batch, criterion): text_lengths = batch["text_lengths"] audio_codes = batch["audio_codes"] wav_lengths = batch["wav_lengths"] - - # Todo: implement masking on the cond slice cond_idxs = batch["cond_idxs"] - + loss_text, loss_mel, _ = self.forward(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels, cond_idxs) loss_dict["loss_text_ce"] = loss_text * self.args.gpt_loss_text_ce_weight loss_dict["loss_mel_ce"] = loss_mel * self.args.gpt_loss_mel_ce_weight @@ -280,6 +278,8 @@ def train_step(self, batch, criterion): return {"model_outputs": None}, loss_dict def eval_step(self, batch, criterion): + # ignore masking for more consistent evaluation + batch["cond_idxs"] = None return self.train_step(batch, criterion) def on_epoch_start(self, trainer): # pylint: disable=W0613 From c4ceaabe2cc85654d47c902779766e8e985a010d Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 16 Oct 2023 15:32:00 -0300 Subject: [PATCH 06/24] Add test sentences during the training --- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 96 ++++++++++++---------- TTS/tts/models/xtts.py | 2 +- recipes/multilingual/xtts_v1/train_xtts.py | 25 +++--- 3 files changed, 68 insertions(+), 55 deletions(-) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index d884f12a79..87b1228ec7 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -12,7 +12,8 @@ from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer from TTS.tts.layers.xtts.gpt import GPT -from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig +from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig, Xtts +from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.base_tts import BaseTTS from coqpit import Coqpit @@ -25,20 +26,21 @@ from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler - from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.utils.io import load_fsspec from TTS.tts.layers.xtts.dvae import DiscreteVAE +from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder + @dataclass -class GPTConfig(TortoiseConfig): +class GPTTrainerConfig(XttsConfig): lr: float = 5e-06 training_seed: int = 1 optimizer_wd_only_on_weights: bool = False weighted_loss_attrs: dict = field(default_factory=lambda: {}) weighted_loss_multipliers: dict = field(default_factory=lambda: {}) - + test_sentences: List[dict] = field(default_factory=lambda: []) @dataclass class XttsAudioConfig(XttsAudioConfig): @@ -58,7 +60,8 @@ class GPTArgs(XttsArgs): tokenizer_file: str = "" mel_norm_file: str = "https://coqui.gateway.scarf.sh/v0.14.0_models/mel_norms.pth" dvae_checkpoint: str = "" - gpt_checkpoint: str = "" + xtts_checkpoint: str = "" + gpt_checkpoint: str = "" # if defined it will replace the gpt weights on xtts model vocoder: str = "" # overide vocoder key on the config to avoid json write issues @@ -80,28 +83,18 @@ def __init__(self, config: Coqpit): """ super().__init__(config, ap=None, tokenizer=None) self.config = config - - self.tokenizer = VoiceBpeTokenizer(self.args.tokenizer_file) - - self.args.gpt_number_text_tokens = self.tokenizer.tokenizer.get_vocab_size() - self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]") - self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]") - - self.gpt = GPT( - layers=self.args.gpt_layers, - model_dim=self.args.gpt_n_model_channels, - start_text_token=self.args.gpt_start_text_token, - stop_text_token=self.args.gpt_stop_text_token, - heads=self.args.gpt_n_heads, - max_text_tokens=self.args.gpt_max_text_tokens, - max_mel_tokens=self.args.gpt_max_audio_tokens, - max_prompt_tokens=self.args.gpt_max_prompt_tokens, - number_text_tokens=self.args.gpt_number_text_tokens, - num_audio_tokens=self.args.gpt_num_audio_tokens, - start_audio_token=self.args.gpt_start_audio_token, - stop_audio_token=self.args.gpt_stop_audio_token, - ).cuda() - + # init XTTS model + self.xtts = Xtts(self.config) + # create the tokenizer with the target vocabulary + self.xtts.tokenizer = VoiceBpeTokenizer(self.args.tokenizer_file) + # init gpt encoder and hifigan decoder + self.xtts.init_models() + # set mel stats + if self.args.mel_norm_file: + self.xtts.mel_stats = load_fsspec(self.args.mel_norm_file) + + if self.args.xtts_checkpoint: + self.load_checkpoint(self.config, self.args.xtts_checkpoint, eval=False, strict=False) # load GPT if available if self.args.gpt_checkpoint: @@ -122,8 +115,8 @@ def __init__(self, config: Coqpit): del gpt_checkpoint[key] # edit checkpoint if the number of tokens is changed to ensures the better transfer learning possible - if "text_embedding.weight" in gpt_checkpoint and gpt_checkpoint["text_embedding.weight"].shape != self.gpt.text_embedding.weight.shape: - num_new_tokens = self.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] + if "text_embedding.weight" in gpt_checkpoint and gpt_checkpoint["text_embedding.weight"].shape != self.xtts.gpt.text_embedding.weight.shape: + num_new_tokens = self.xtts.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] print(f" > Loading checkpoint with {num_new_tokens} additional tokens.") # add new tokens to a linear layer (text_head) @@ -137,7 +130,7 @@ def __init__(self, config: Coqpit): # add new weights to the linear layer (text_head) text_head_weight = gpt_checkpoint["text_head.weight"] start_token_row = text_head_weight[-1, :] - new_entry = torch.randn(num_new_tokens, self.gpt.text_head.weight.shape[1]) + new_entry = torch.randn(num_new_tokens, self.xtts.gpt.text_head.weight.shape[1]) text_head_weight = torch.cat([text_head_weight, new_entry], axis=0) text_head_weight[-1, :] = start_token_row gpt_checkpoint["text_head.weight"] = text_head_weight @@ -150,10 +143,8 @@ def __init__(self, config: Coqpit): text_head_bias[-1] = start_token_row gpt_checkpoint["text_head.bias"] = text_head_bias - self.gpt.load_state_dict(gpt_checkpoint, strict=True) + self.xtts.gpt.load_state_dict(gpt_checkpoint, strict=True) print(">> GPT weights restored from:", self.args.gpt_checkpoint) - else: - print(">> GPT weights randomly initialized! If you want you can specify a checkpoint in config.model_args.gpt_checkpoint") # Mel spectrogram extractor for conditioning self.torch_mel_spectrogram_style_encoder = TorchMelSpectrogram( @@ -195,6 +186,7 @@ def __init__(self, config: Coqpit): # Mel spectrogram extractor for DVAE self.torch_mel_spectrogram_dvae = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.dvae_sample_rate) + @property def device(self): return next(self.parameters()).device @@ -211,12 +203,30 @@ def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels cond_mels: MEL float tensor, (b, num_samples, 80,t_m) cond_idxs: cond start and end indexs, (b, 2) """ - losses = self.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs) + losses = self.xtts.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs) return losses @torch.no_grad() def test_run(self, assets) -> Tuple[Dict, Dict]: # pylint: disable=W0613 - return {}, {} + if self.config.test_sentences: + # init gpt for inference mode + self.xtts.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache, use_deepspeed=False) + self.xtts.gpt.eval() + test_audios = {} + print(" | > Synthesizing test sentences.") + for idx, s_info in enumerate(self.config.test_sentences): + wav = self.xtts.synthesize(s_info["text"], self.config, s_info["speaker_wav"], s_info["language"])["wav"] + test_audios["{}-audio".format(idx)] = wav + + # delete inference layers + del self.xtts.gpt.gpt_inference + del self.xtts.gpt.gpt.wte + return {"audios": test_audios} + + def test_log( + self, outputs: dict, logger: "Logger", assets: dict, steps: int # pylint: disable=unused-argument + ) -> None: + logger.test_audios(steps, outputs["audios"], self.args.output_sample_rate) def format_batch(self, batch: Dict) -> Dict: return batch @@ -323,7 +333,7 @@ def get_data_loader( loader = None else: # init dataloader - dataset = XTTSDataset(self.config, samples, self.tokenizer, config.audio.sample_rate, is_eval) + dataset = XTTSDataset(self.config, samples, self.xtts.tokenizer, config.audio.sample_rate, is_eval) # wait all the DDP process to be ready if num_gpus > 1: @@ -362,7 +372,7 @@ def get_optimizer(self) -> List: # ToDo: deal with multi GPU training if self.config.optimizer_wd_only_on_weights: # parameters to only GPT model - net = self.gpt + net = self.xtts.gpt # normalizations norm_modules = (nn.BatchNorm2d, nn.InstanceNorm2d, nn.BatchNorm1d, nn.InstanceNorm1d, @@ -410,7 +420,7 @@ def get_optimizer(self) -> List: self.config.optimizer_params, self.config.lr, # optimize only for the GPT model - parameters=self.gpt.parameters(), + parameters=self.xtts.gpt.parameters(), ) def get_scheduler(self, optimizer) -> List: @@ -432,21 +442,21 @@ def load_checkpoint( target_options={"anon": True}, ): # pylint: disable=unused-argument, disable=W0201, disable=W0102, redefined-builtin """Load the model checkpoint and setup for training or inference""" - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu")) + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))["model"] # load the model weights - self.gpt.load_state_dict(state, strict=strict) + self.xtts.load_state_dict(state, strict=strict) if eval: + self.xtts.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache, use_deepspeed=False) self.eval() - self.set_inference() assert not self.training @staticmethod - def init_from_config(config: "GPTConfig", samples: Union[List[List], List[Dict]] = None): + def init_from_config(config: "GPTTrainerConfig", samples: Union[List[List], List[Dict]] = None): """Initiate model from config Args: - config (GPTConfig): Model config. + config (GPTTrainerConfig): Model config. samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training. Defaults to None. """ diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 40e8f946c6..3e6097997c 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -387,7 +387,7 @@ def get_gpt_cond_latents(self, audio_path: str, length: int = 3): audio = load_audio(audio_path) audio = audio[:, : 22050 * length] mel = wav_to_mel_cloning(audio, mel_norms=self.mel_stats.cpu()) - cond_latent = self.gpt.get_style_emb(mel.to(self.device), sample=False) + cond_latent = self.gpt.get_style_emb(mel.to(self.device)) return cond_latent.transpose(1, 2) @torch.inference_mode() diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py index 429e4e3af9..f36bf1ae44 100644 --- a/recipes/multilingual/xtts_v1/train_xtts.py +++ b/recipes/multilingual/xtts_v1/train_xtts.py @@ -3,7 +3,7 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTTrainer, GPTArgs, XttsAudioConfig, GPTConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTTrainer, GPTArgs, XttsAudioConfig, GPTTrainerConfig config_coqui_MLS_metadata_train_with_previous_audio_key_de = BaseDatasetConfig( @@ -265,21 +265,21 @@ def main(): debug_loading_failures=False, max_wav_length=255995, # ~11.6 seconds max_text_length=200, - tokenizer_file="/raid/datasets/xtts_models/vocab.json", mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", - gpt_checkpoint="/raid/datasets/xtts_models/gpt.pth", + tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune + xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", # checkpoint path of the model that you want to fine-tune gpt_num_audio_tokens=8194, gpt_start_audio_token=8192, gpt_stop_audio_token=8193, ) audio_config = XttsAudioConfig( - sample_rate=22050, # autoregressive SR + sample_rate=22050, # GPT SR dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 ) - config = GPTConfig( + config = GPTTrainerConfig( output_path=OUT_PATH, model_args=model_args, run_name=RUN_NAME, @@ -313,6 +313,10 @@ def main(): lr_scheduler="MultiStepLR", # it was adjusted accordly for the new step scheme lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, + test_sentences=[ + {"text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", "speaker_wav": "/raid/edresson/dev/ref.wav", "language": "en"}, + {"text": "This cake is great. It's so delicious and moist.", "speaker_wav": "/raid/edresson/dev/ref.wav", "language": "en"}, + ] ) # init the model from config @@ -341,7 +345,7 @@ def main(): if __name__ == "__main__": RUN_NAME = "GPT_XTTS" - PROJECT_NAME = "XTTS" + PROJECT_NAME = "XTTS_trainer" OUT_PATH = "/raid/edresson/dev/Checkpoints/XTTS_style_emb/" DASHBOARD_LOGGER = "clearml" LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_style_emb/" @@ -352,12 +356,11 @@ def main(): GRAD_ACUMM_STEPS = 28 # debug - DASHBOARD_LOGGER = "tensorboard" - LOGGER_URI = None - RESTORE_PATH = None - BATCH_SIZE = 10 + # DASHBOARD_LOGGER = "tensorboard" + # LOGGER_URI = None + # RESTORE_PATH = None + BATCH_SIZE = 2 GRAD_ACUMM_STEPS = 1 - NUM_LOADERS = 1 From 9e3598c3b714db5610728635bf5a1dc170a4dd21 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 09:42:00 -0300 Subject: [PATCH 07/24] Bug Fix on inference using XTTS trainer checkpoint --- TTS/tts/layers/xtts/gpt.py | 5 +- TTS/tts/layers/xtts/hifigan_decoder.py | 29 +-- TTS/tts/layers/xtts/stream_generator.py | 271 ++++++--------------- TTS/tts/layers/xtts/trainer/dataset.py | 79 +++--- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 143 ++++++----- TTS/tts/models/xtts.py | 48 ++-- recipes/ljspeech/xtts_v1/train_xtts.py | 145 +++++++++++ recipes/multilingual/xtts_v1/train_xtts.py | 67 ++--- tests/zoo_tests/test_models.py | 3 +- 9 files changed, 419 insertions(+), 371 deletions(-) create mode 100644 recipes/ljspeech/xtts_v1/train_xtts.py diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index 52086f1368..8f24ac0154 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -197,6 +197,7 @@ def init_gpt_for_inference(self, kv_cache=True, use_deepspeed=False): if use_deepspeed: import deepspeed + self.ds_engine = deepspeed.init_inference( model=self.gpt_inference.half(), # Transformers models mp_size=1, # Number of GPU @@ -451,7 +452,7 @@ def forward( if cond_idxs is not None: for idx, r in enumerate(cond_idxs.squeeze()): l = r[1] - r[0] - attn_mask_cond[idx, l : ] = 0.0 + attn_mask_cond[idx, l:] = 0.0 for idx, l in enumerate(text_lengths): attn_mask_text[idx, l + 1 :] = 0.0 @@ -498,7 +499,7 @@ def forward( for idx, l in enumerate(code_lengths): mel_targets[idx, l + 1 :] = -1 - + # check if stoptoken is in every row of mel_targets assert (mel_targets == self.stop_audio_token).sum() >= mel_targets.shape[ 0 diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 6439b455a0..5fcff8703b 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -1,13 +1,12 @@ import torch +import torchaudio from torch import nn from torch.nn import Conv1d, ConvTranspose1d from torch.nn import functional as F from torch.nn.utils import remove_weight_norm, weight_norm -import torchaudio from TTS.utils.io import load_fsspec - LRELU_SLOPE = 0.1 @@ -224,9 +223,7 @@ def __init__( self.cond_in_each_up_layer = cond_in_each_up_layer # initial upsampling layers - self.conv_pre = weight_norm( - Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3) - ) + self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)) resblock = ResBlock1 if resblock_type == "1" else ResBlock2 # upsampling layers self.ups = nn.ModuleList() @@ -246,14 +243,10 @@ def __init__( self.resblocks = nn.ModuleList() for i in range(len(self.ups)): ch = upsample_initial_channel // (2 ** (i + 1)) - for _, (k, d) in enumerate( - zip(resblock_kernel_sizes, resblock_dilation_sizes) - ): + for _, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): self.resblocks.append(resblock(ch, k, d)) # post convolution layer - self.conv_post = weight_norm( - Conv1d(ch, out_channels, 7, 1, padding=3, bias=conv_post_bias) - ) + self.conv_post = weight_norm(Conv1d(ch, out_channels, 7, 1, padding=3, bias=conv_post_bias)) if cond_channels > 0: self.cond_layer = nn.Conv1d(cond_channels, upsample_initial_channel, 1) @@ -318,9 +311,7 @@ def inference(self, c): Tensor: [B, 1, T] """ c = c.to(self.conv_pre.weight.device) - c = torch.nn.functional.pad( - c, (self.inference_padding, self.inference_padding), "replicate" - ) + c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate") return self.forward(c) def remove_weight_norm(self): @@ -342,6 +333,7 @@ def load_checkpoint( assert not self.training self.remove_weight_norm() + class SELayer(nn.Module): def __init__(self, channel, reduction=8): super(SELayer, self).__init__() @@ -425,10 +417,8 @@ def forward(self, x): return torch.nn.functional.conv1d(x, self.filter).squeeze(1) - class ResNetSpeakerEncoder(nn.Module): - """This is copied from 🐸TTS to remove it from the dependencies. - """ + """This is copied from 🐸TTS to remove it from the dependencies.""" # pylint: disable=W0102 def __init__( @@ -620,6 +610,7 @@ def load_checkpoint( return criterion, state["step"] return criterion + class HifiDecoder(torch.nn.Module): def __init__( self, @@ -724,9 +715,7 @@ def inference(self, c, g): """ return self.forward(c, g=g) - def load_checkpoint( - self, checkpoint_path, eval=False - ): # pylint: disable=unused-argument, redefined-builtin + def load_checkpoint(self, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = load_fsspec(checkpoint_path, map_location=torch.device("cpu")) # remove unused keys state = state["model"] diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index 8bdd2291ff..e12f8995cf 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -1,26 +1,27 @@ # Adapted from: https://github.com/LowinLi/transformers-stream-generator +import copy +import inspect +import random +import warnings +from typing import Callable, List, Optional, Union + +import numpy as np +import torch +import torch.distributed as dist +from torch import nn from transformers import ( + BeamSearchScorer, + ConstrainedBeamSearchScorer, + DisjunctiveConstraint, GenerationConfig, GenerationMixin, LogitsProcessorList, - StoppingCriteriaList, - DisjunctiveConstraint, - BeamSearchScorer, PhrasalConstraint, - ConstrainedBeamSearchScorer, PreTrainedModel, + StoppingCriteriaList, ) -import numpy as np -import random -import warnings -import inspect from transformers.generation.utils import GenerateOutput, SampleOutput, logger -import torch -from typing import Callable, List, Optional, Union -from torch import nn -import torch.distributed as dist -import copy def setup_seed(seed): @@ -48,9 +49,7 @@ def generate( generation_config: Optional[StreamGenerationConfig] = None, logits_processor: Optional[LogitsProcessorList] = None, stopping_criteria: Optional[StoppingCriteriaList] = None, - prefix_allowed_tokens_fn: Optional[ - Callable[[int, torch.Tensor], List[int]] - ] = None, + prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, synced_gpus: Optional[bool] = False, seed=0, **kwargs, @@ -125,7 +124,7 @@ def generate( - [`~generation.BeamSearchEncoderDecoderOutput`], - [`~generation.BeamSampleEncoderDecoderOutput`] """ - #setup_seed(seed) + # setup_seed(seed) # 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call self._validate_model_class() @@ -134,9 +133,7 @@ def generate( # legacy: users may modify the model configuration to control generation -- update the generation config # model attribute accordingly, if it was created from the model config if self.generation_config._from_model_config: - new_generation_config = StreamGenerationConfig.from_model_config( - self.config - ) + new_generation_config = StreamGenerationConfig.from_model_config(self.config) if new_generation_config != self.generation_config: warnings.warn( "You have modified the pretrained model configuration to control generation. This is a" @@ -148,25 +145,14 @@ def generate( generation_config = self.generation_config generation_config = copy.deepcopy(generation_config) - model_kwargs = generation_config.update( - **kwargs - ) # All unused kwargs must be model kwargs + model_kwargs = generation_config.update(**kwargs) # All unused kwargs must be model kwargs # self._validate_model_kwargs(model_kwargs.copy()) # 2. Set generation parameters if not already defined - logits_processor = ( - logits_processor if logits_processor is not None else LogitsProcessorList() - ) - stopping_criteria = ( - stopping_criteria - if stopping_criteria is not None - else StoppingCriteriaList() - ) + logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() + stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() - if ( - generation_config.pad_token_id is None - and generation_config.eos_token_id is not None - ): + if generation_config.pad_token_id is None and generation_config.eos_token_id is not None: if model_kwargs.get("attention_mask", None) is None: logger.warning( "The attention mask and the pad token id were not set. As a consequence, you may observe " @@ -175,9 +161,7 @@ def generate( eos_token_id = generation_config.eos_token_id if isinstance(eos_token_id, list): eos_token_id = eos_token_id[0] - logger.warning( - f"Setting `pad_token_id` to `eos_token_id`:{eos_token_id} for open-end generation." - ) + logger.warning(f"Setting `pad_token_id` to `eos_token_id`:{eos_token_id} for open-end generation.") generation_config.pad_token_id = eos_token_id # 3. Define model inputs @@ -195,19 +179,11 @@ def generate( model_kwargs["output_hidden_states"] = generation_config.output_hidden_states model_kwargs["use_cache"] = generation_config.use_cache - accepts_attention_mask = "attention_mask" in set( - inspect.signature(self.forward).parameters.keys() - ) + accepts_attention_mask = "attention_mask" in set(inspect.signature(self.forward).parameters.keys()) requires_attention_mask = "encoder_outputs" not in model_kwargs - if ( - model_kwargs.get("attention_mask", None) is None - and requires_attention_mask - and accepts_attention_mask - ): - model_kwargs[ - "attention_mask" - ] = self._prepare_attention_mask_for_generation( + if model_kwargs.get("attention_mask", None) is None and requires_attention_mask and accepts_attention_mask: + model_kwargs["attention_mask"] = self._prepare_attention_mask_for_generation( inputs_tensor, generation_config.pad_token_id, generation_config.eos_token_id, @@ -217,8 +193,7 @@ def generate( if not self.config.is_encoder_decoder: if ( generation_config.pad_token_id is not None - and torch.sum(inputs_tensor[:, -1] == generation_config.pad_token_id) - > 0 + and torch.sum(inputs_tensor[:, -1] == generation_config.pad_token_id) > 0 ): logger.warning( "A decoder-only architecture is being used, but right-padding was detected! For correct " @@ -247,10 +222,7 @@ def generate( # 6. Prepare `max_length` depending on other stopping criteria. input_ids_seq_length = input_ids.shape[-1] - has_default_max_length = ( - kwargs.get("max_length") is None - and generation_config.max_length is not None - ) + has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None if has_default_max_length and generation_config.max_new_tokens is None: warnings.warn( "Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to" @@ -260,12 +232,8 @@ def generate( UserWarning, ) elif has_default_max_length and generation_config.max_new_tokens is not None: - generation_config.max_length = ( - generation_config.max_new_tokens + input_ids_seq_length - ) - elif ( - not has_default_max_length and generation_config.max_new_tokens is not None - ): + generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length + elif not has_default_max_length and generation_config.max_new_tokens is not None: raise ValueError( "Both `max_new_tokens` and `max_length` have been set but they serve the same purpose -- setting a" " limit to the generated output length. Remove one of those arguments. Please refer to the" @@ -273,18 +241,13 @@ def generate( "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)" ) - if ( - generation_config.min_length is not None - and generation_config.min_length > generation_config.max_length - ): + if generation_config.min_length is not None and generation_config.min_length > generation_config.max_length: raise ValueError( f"Unfeasible length constraints: the minimum length ({generation_config.min_length}) is larger than" f" the maximum length ({generation_config.max_length})" ) if input_ids_seq_length >= generation_config.max_length: - input_ids_string = ( - "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids" - ) + input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids" logger.warning( f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to" f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider" @@ -293,8 +256,7 @@ def generate( # 7. determine generation mode is_constraint_gen_mode = ( - generation_config.constraints is not None - or generation_config.force_words_ids is not None + generation_config.constraints is not None or generation_config.force_words_ids is not None ) is_contrastive_search_gen_mode = ( @@ -349,9 +311,7 @@ def generate( ) if generation_config.num_beam_groups > generation_config.num_beams: - raise ValueError( - "`num_beam_groups` has to be smaller or equal to `num_beams`" - ) + raise ValueError("`num_beam_groups` has to be smaller or equal to `num_beams`") if is_group_beam_gen_mode and generation_config.do_sample is True: raise ValueError( "Diverse beam search cannot be used in sampling mode. Make sure that `do_sample` is set to `False`." @@ -474,14 +434,10 @@ def generate( ) elif is_beam_gen_mode: if generation_config.num_return_sequences > generation_config.num_beams: - raise ValueError( - "`num_return_sequences` has to be smaller or equal to `num_beams`." - ) + raise ValueError("`num_return_sequences` has to be smaller or equal to `num_beams`.") if stopping_criteria.max_length is None: - raise ValueError( - "`max_length` needs to be a stopping_criteria for now." - ) + raise ValueError("`max_length` needs to be a stopping_criteria for now.") # 11. prepare beam search scorer beam_scorer = BeamSearchScorer( @@ -518,9 +474,7 @@ def generate( logits_warper = self._get_logits_warper(generation_config) if stopping_criteria.max_length is None: - raise ValueError( - "`max_length` needs to be a stopping_criteria for now." - ) + raise ValueError("`max_length` needs to be a stopping_criteria for now.") # 12. prepare beam search scorer beam_scorer = BeamSearchScorer( batch_size=batch_size * generation_config.num_return_sequences, @@ -533,8 +487,7 @@ def generate( # 13. interleave input_ids with `num_beams` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( input_ids=input_ids, - expand_size=generation_config.num_beams - * generation_config.num_return_sequences, + expand_size=generation_config.num_beams * generation_config.num_return_sequences, is_encoder_decoder=self.config.is_encoder_decoder, **model_kwargs, ) @@ -556,27 +509,17 @@ def generate( elif is_group_beam_gen_mode: if generation_config.num_return_sequences > generation_config.num_beams: - raise ValueError( - "`num_return_sequences` has to be smaller or equal to `num_beams`." - ) + raise ValueError("`num_return_sequences` has to be smaller or equal to `num_beams`.") if generation_config.num_beams % generation_config.num_beam_groups != 0: - raise ValueError( - "`num_beams` should be divisible by `num_beam_groups` for group beam search." - ) + raise ValueError("`num_beams` should be divisible by `num_beam_groups` for group beam search.") if stopping_criteria.max_length is None: - raise ValueError( - "`max_length` needs to be a stopping_criteria for now." - ) + raise ValueError("`max_length` needs to be a stopping_criteria for now.") - has_default_typical_p = ( - kwargs.get("typical_p") is None and generation_config.typical_p == 1.0 - ) + has_default_typical_p = kwargs.get("typical_p") is None and generation_config.typical_p == 1.0 if not has_default_typical_p: - raise ValueError( - "Decoder argument `typical_p` is not supported with beam groups." - ) + raise ValueError("Decoder argument `typical_p` is not supported with beam groups.") # 11. prepare beam search scorer beam_scorer = BeamSearchScorer( @@ -612,32 +555,19 @@ def generate( elif is_constraint_gen_mode: if generation_config.num_return_sequences > generation_config.num_beams: - raise ValueError( - "`num_return_sequences` has to be smaller or equal to `num_beams`." - ) + raise ValueError("`num_return_sequences` has to be smaller or equal to `num_beams`.") if stopping_criteria.max_length is None: - raise ValueError( - "`max_length` needs to be a stopping_criteria for now." - ) + raise ValueError("`max_length` needs to be a stopping_criteria for now.") if generation_config.num_beams <= 1: - raise ValueError( - "`num_beams` needs to be greater than 1 for constrained generation." - ) + raise ValueError("`num_beams` needs to be greater than 1 for constrained generation.") if generation_config.do_sample: - raise ValueError( - "`do_sample` needs to be false for constrained generation." - ) + raise ValueError("`do_sample` needs to be false for constrained generation.") - if ( - generation_config.num_beam_groups is not None - and generation_config.num_beam_groups > 1 - ): - raise ValueError( - "`num_beam_groups` not supported yet for constrained generation." - ) + if generation_config.num_beam_groups is not None and generation_config.num_beam_groups > 1: + raise ValueError("`num_beam_groups` not supported yet for constrained generation.") final_constraints = [] if generation_config.constraints is not None: @@ -661,15 +591,10 @@ def typeerror(): if isinstance(word_ids[0], list): if not isinstance(word_ids, list) or len(word_ids) == 0: typeerror() - if any( - not isinstance(token_ids, list) for token_ids in word_ids - ): + if any(not isinstance(token_ids, list) for token_ids in word_ids): typeerror() if any( - any( - (not isinstance(token_id, int) or token_id < 0) - for token_id in token_ids - ) + any((not isinstance(token_id, int) or token_id < 0) for token_id in token_ids) for token_ids in word_ids ): typeerror() @@ -678,10 +603,7 @@ def typeerror(): else: if not isinstance(word_ids, list) or len(word_ids) == 0: typeerror() - if any( - (not isinstance(token_id, int) or token_id < 0) - for token_id in word_ids - ): + if any((not isinstance(token_id, int) or token_id < 0) for token_id in word_ids): typeerror() constraint = PhrasalConstraint(word_ids) @@ -843,52 +765,26 @@ def sample_stream( ['Today is a beautiful day, and a wonderful day.\n\nI was lucky enough to meet the'] ```""" # init values - logits_processor = ( - logits_processor if logits_processor is not None else LogitsProcessorList() - ) - stopping_criteria = ( - stopping_criteria - if stopping_criteria is not None - else StoppingCriteriaList() - ) + logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() + stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() if max_length is not None: warnings.warn( "`max_length` is deprecated in this function, use" " `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.", UserWarning, ) - stopping_criteria = validate_stopping_criteria( - stopping_criteria, max_length - ) - logits_warper = ( - logits_warper if logits_warper is not None else LogitsProcessorList() - ) - pad_token_id = ( - pad_token_id - if pad_token_id is not None - else self.generation_config.pad_token_id - ) - eos_token_id = ( - eos_token_id - if eos_token_id is not None - else self.generation_config.eos_token_id - ) + stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length) + logits_warper = logits_warper if logits_warper is not None else LogitsProcessorList() + pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id + eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id if isinstance(eos_token_id, int): eos_token_id = [eos_token_id] - output_scores = ( - output_scores - if output_scores is not None - else self.generation_config.output_scores - ) + output_scores = output_scores if output_scores is not None else self.generation_config.output_scores output_attentions = ( - output_attentions - if output_attentions is not None - else self.generation_config.output_attentions + output_attentions if output_attentions is not None else self.generation_config.output_attentions ) output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.generation_config.output_hidden_states + output_hidden_states if output_hidden_states is not None else self.generation_config.output_hidden_states ) return_dict_in_generate = ( return_dict_in_generate @@ -898,15 +794,9 @@ def sample_stream( # init attention / hidden states / scores tuples scores = () if (return_dict_in_generate and output_scores) else None - decoder_attentions = ( - () if (return_dict_in_generate and output_attentions) else None - ) - cross_attentions = ( - () if (return_dict_in_generate and output_attentions) else None - ) - decoder_hidden_states = ( - () if (return_dict_in_generate and output_hidden_states) else None - ) + decoder_attentions = () if (return_dict_in_generate and output_attentions) else None + cross_attentions = () if (return_dict_in_generate and output_attentions) else None + decoder_hidden_states = () if (return_dict_in_generate and output_hidden_states) else None # keep track of which sequences are already finished unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) @@ -917,9 +807,7 @@ def sample_stream( if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence - this_peer_finished_flag = torch.tensor( - 0.0 if this_peer_finished else 1.0 - ).to(input_ids.device) + this_peer_finished_flag = torch.tensor(0.0 if this_peer_finished else 1.0).to(input_ids.device) # send 0.0 if we finished, 1.0 otherwise dist.all_reduce(this_peer_finished_flag, op=dist.ReduceOp.SUM) # did all peers finish? the reduced sum will be 0.0 then @@ -952,18 +840,14 @@ def sample_stream( scores += (next_token_scores,) if output_attentions: decoder_attentions += ( - (outputs.decoder_attentions,) - if self.config.is_encoder_decoder - else (outputs.attentions,) + (outputs.decoder_attentions,) if self.config.is_encoder_decoder else (outputs.attentions,) ) if self.config.is_encoder_decoder: cross_attentions += (outputs.cross_attentions,) if output_hidden_states: decoder_hidden_states += ( - (outputs.decoder_hidden_states,) - if self.config.is_encoder_decoder - else (outputs.hidden_states,) + (outputs.decoder_hidden_states,) if self.config.is_encoder_decoder else (outputs.hidden_states,) ) # sample @@ -973,12 +857,8 @@ def sample_stream( # finished sentences should have their next token be a padding token if eos_token_id is not None: if pad_token_id is None: - raise ValueError( - "If `eos_token_id` is defined, make sure that `pad_token_id` is defined." - ) - next_tokens = next_tokens * unfinished_sequences + pad_token_id * ( - 1 - unfinished_sequences - ) + raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.") + next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences) yield next_tokens, self.final_norm(outputs.hidden_states[-1][:, -1]) # update generated ids, model inputs, and length for next step input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) @@ -988,9 +868,7 @@ def sample_stream( # if eos_token was found in one sentence, set sentence to finished if eos_token_id is not None: - unfinished_sequences = unfinished_sequences.mul( - (sum(next_tokens != i for i in eos_token_id)).long() - ) + unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long()) # stop when each sentence is finished, or if we exceed the maximum length if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores): @@ -1007,22 +885,17 @@ def init_stream_support(): if __name__ == "__main__": - from transformers import PreTrainedModel - from transformers import AutoTokenizer, AutoModelForCausalLM + from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel PreTrainedModel.generate = NewGenerationMixin.generate PreTrainedModel.sample_stream = NewGenerationMixin.sample_stream - model = AutoModelForCausalLM.from_pretrained( - "bigscience/bloom-560m", torch_dtype=torch.float16 - ) + model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m", torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m") model = model.to("cuda:0") model = model.eval() prompt_text = "hello? \n" - input_ids = tokenizer( - prompt_text, return_tensors="pt", add_special_tokens=False - ).input_ids + input_ids = tokenizer(prompt_text, return_tensors="pt", add_special_tokens=False).input_ids input_ids = input_ids.to("cuda:0") with torch.no_grad(): diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index b122fc8aac..41401fd6f8 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -1,16 +1,18 @@ import os import random import sys -import numpy as np +import numpy as np import torch import torch.nn.functional as F import torch.utils.data import torchaudio -from torchaudio.backend.sox_io_backend import load as torchaudio_sox_load from torchaudio.backend.soundfile_backend import load as torchaudio_soundfile_load +from torchaudio.backend.sox_io_backend import load as torchaudio_sox_load + torch.set_num_threads(1) + def key_samples_by_col(samples, col): """Returns a dictionary of samples keyed by language.""" samples_by_col = {} @@ -23,11 +25,11 @@ def key_samples_by_col(samples, col): return samples_by_col -def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate, is_eval=False): +def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate, is_eval=False): rel_clip = load_audio(gt_path, sample_rate) # if eval uses a middle size sample when it is possible to be more reproducible if is_eval: - sample_length = int((min_sample_length + max_sample_length)/2) + sample_length = int((min_sample_length + max_sample_length) / 2) else: sample_length = random.randint(min_sample_length, max_sample_length) gap = rel_clip.shape[-1] - sample_length @@ -41,7 +43,7 @@ def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate, else: rand_start = random.randint(0, gap) - rand_end = rand_start+sample_length + rand_end = rand_start + sample_length rel_clip = rel_clip[:, rand_start:rand_end] rel_clip = F.pad(rel_clip, pad=(0, max_sample_length - rel_clip.shape[-1])) cond_idxs = [rand_start, rand_end] @@ -50,7 +52,7 @@ def get_prompt_slice(gt_path, max_sample_length, min_sample_length, sample_rate, def load_audio(audiopath, sampling_rate): # better load setting following: https://github.com/faroit/python_audio_loading_benchmark - if audiopath[-4:] == '.mp3': + if audiopath[-4:] == ".mp3": # it uses torchaudio with sox backend to load mp3 audio, lsr = torchaudio_sox_load(audiopath) else: @@ -72,6 +74,7 @@ def load_audio(audiopath, sampling_rate): audio.clip_(-1, 1) return audio + class XTTSDataset(torch.utils.data.Dataset): def __init__(self, config, samples, tokenizer, sample_rate, is_eval=False): self.config = config @@ -103,16 +106,18 @@ def check_eval_samples(self): print(" > Filtering invalid eval samples!!") new_samples = [] for sample in self.samples: - try: - tseq, _, wav, _, _, _ = self.load_item(sample) - except: - pass - # Basically, this audio file is nonexistent or too long to be supported by the dataset. - if wav is None or \ - (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) or \ - (self.max_text_len is not None and tseq.shape[0] > self.max_text_len): - continue - new_samples.append(sample) + try: + tseq, _, wav, _, _, _ = self.load_item(sample) + except: + pass + # Basically, this audio file is nonexistent or too long to be supported by the dataset. + if ( + wav is None + or (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) + or (self.max_text_len is not None and tseq.shape[0] > self.max_text_len) + ): + continue + new_samples.append(sample) self.samples = new_samples print(" > Total eval samples after filtering:", len(self.samples)) @@ -125,9 +130,9 @@ def get_text(self, text, lang): return tokens def load_item(self, sample): - text = str(sample['text']) + text = str(sample["text"]) tseq = self.get_text(text, sample["language"]) - audiopath = sample['audio_file'] + audiopath = sample["audio_file"] wav = load_audio(audiopath, self.sample_rate) if text is None or len(text.strip()) == 0: raise ValueError @@ -136,7 +141,9 @@ def load_item(self, sample): raise ValueError # get a slice from GT to condition the model - cond, cond_len, cond_idxs = get_prompt_slice(audiopath, self.max_conditioning_length, self.min_conditioning_length, self.sample_rate, self.is_eval) + cond, cond_len, cond_idxs = get_prompt_slice( + audiopath, self.max_conditioning_length, self.min_conditioning_length, self.sample_rate, self.is_eval + ) return tseq, audiopath, wav, cond, cond_len, cond_idxs @@ -151,7 +158,7 @@ def __getitem__(self, index): index = random.randint(0, len(self.samples[lang]) - 1) sample = self.samples[lang][index] # a unique id for each sampel to deal with fails - sample_id = lang+"_"+str(index) + sample_id = lang + "_" + str(index) # ignore samples that we already know that is not valid ones if sample_id in self.failed_samples: @@ -170,26 +177,30 @@ def __getitem__(self, index): return self[1] # check if the audio and text size limits and if it out of the limits, added it failed_samples - if wav is None or \ - (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) or \ - (self.max_text_len is not None and tseq.shape[0] > self.max_text_len): + if ( + wav is None + or (self.max_wav_len is not None and wav.shape[-1] > self.max_wav_len) + or (self.max_text_len is not None and tseq.shape[0] > self.max_text_len) + ): # Basically, this audio file is nonexistent or too long to be supported by the dataset. # It's hard to handle this situation properly. Best bet is to return the a random valid token and skew the dataset somewhat as a result. if self.debug_failures and wav is not None and tseq is not None: - print(f"error loading {sample['audio_file']}: ranges are out of bounds; {wav.shape[-1]}, {tseq.shape[0]}") + print( + f"error loading {sample['audio_file']}: ranges are out of bounds; {wav.shape[-1]}, {tseq.shape[0]}" + ) self.failed_samples.add(sample_id) return self[1] res = { # 'real_text': text, - 'text': tseq, - 'text_lengths': torch.tensor(tseq.shape[0], dtype=torch.long), - 'wav': wav, - 'wav_lengths': torch.tensor(wav.shape[-1], dtype=torch.long), - 'filenames': audiopath, - 'conditioning': cond.unsqueeze(1), - 'cond_lens': torch.tensor(cond_len, dtype=torch.long), - 'cond_idxs': torch.tensor(cond_idxs), + "text": tseq, + "text_lengths": torch.tensor(tseq.shape[0], dtype=torch.long), + "wav": wav, + "wav_lengths": torch.tensor(wav.shape[-1], dtype=torch.long), + "filenames": audiopath, + "conditioning": cond.unsqueeze(1), + "cond_lens": torch.tensor(cond_len, dtype=torch.long), + "cond_idxs": torch.tensor(cond_idxs), } return res @@ -223,8 +234,8 @@ def collate_fn(self, batch): for i in range(B): text = batch["text"][i] text_padded[i, : batch["text_lengths"][i]] = torch.IntTensor(text) - wav = batch['wav'][i] - wav_padded[i, :, :batch["wav_lengths"][i]] = torch.FloatTensor(wav) + wav = batch["wav"][i] + wav_padded[i, :, : batch["wav_lengths"][i]] = torch.FloatTensor(wav) batch["wav"] = wav_padded batch["padded_text"] = text_padded diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 87b1228ec7..e4df2b90d5 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -1,37 +1,30 @@ import os +import sys from dataclasses import dataclass, field from typing import Callable, Dict, List, Optional, Tuple, Union import torch -import torchaudio import torch.nn as nn +import torchaudio +from coqpit import Coqpit from torch.nn import functional as F from torch.utils.data import DataLoader -import sys - - -from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer -from TTS.tts.layers.xtts.gpt import GPT -from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig, Xtts -from TTS.tts.configs.xtts_config import XttsConfig - -from TTS.tts.models.base_tts import BaseTTS -from coqpit import Coqpit - -from TTS.tts.configs.tortoise_config import TortoiseConfig -from TTS.tts.layers.tortoise.arch_utils import TorchMelSpectrogram - -from TTS.tts.datasets.dataset import TTSDataset - from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler +from TTS.tts.configs.tortoise_config import TortoiseConfig +from TTS.tts.configs.xtts_config import XttsConfig +from TTS.tts.datasets.dataset import TTSDataset +from TTS.tts.layers.tortoise.arch_utils import TorchMelSpectrogram +from TTS.tts.layers.xtts.dvae import DiscreteVAE +from TTS.tts.layers.xtts.gpt import GPT +from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder +from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset +from TTS.tts.models.base_tts import BaseTTS +from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig from TTS.utils.io import load_fsspec -from TTS.tts.layers.xtts.dvae import DiscreteVAE - -from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder @dataclass class GPTTrainerConfig(XttsConfig): @@ -42,6 +35,7 @@ class GPTTrainerConfig(XttsConfig): weighted_loss_multipliers: dict = field(default_factory=lambda: {}) test_sentences: List[dict] = field(default_factory=lambda: []) + @dataclass class XttsAudioConfig(XttsAudioConfig): dvae_sample_rate: int = 22050 @@ -55,27 +49,28 @@ class GPTArgs(XttsArgs): gpt_loss_mel_ce_weight: float = 1.0 gpt_num_audio_tokens: int = 8194 debug_loading_failures: bool = False - max_wav_length: int = 255995 # ~11.6 seconds + max_wav_length: int = 255995 # ~11.6 seconds max_text_length: int = 200 tokenizer_file: str = "" mel_norm_file: str = "https://coqui.gateway.scarf.sh/v0.14.0_models/mel_norms.pth" dvae_checkpoint: str = "" xtts_checkpoint: str = "" - gpt_checkpoint: str = "" # if defined it will replace the gpt weights on xtts model - vocoder: str = "" # overide vocoder key on the config to avoid json write issues + gpt_checkpoint: str = "" # if defined it will replace the gpt weights on xtts model + vocoder: str = "" # overide vocoder key on the config to avoid json write issues def callback_clearml_load_save(operation_type, model_info): # return None means skip the file upload/log, returning model_info will continue with the log/upload # you can also change the upload destination file name model_info.upload_filename or check the local file size with Path(model_info.local_model_path).stat().st_size - assert operation_type in ('load', 'save') + assert operation_type in ("load", "save") # print(operation_type, model_info.__dict__) - if "similarities.pth" in model_info.__dict__['local_model_path']: + if "similarities.pth" in model_info.__dict__["local_model_path"]: return None return model_info + class GPTTrainer(BaseTTS): def __init__(self, config: Coqpit): """ @@ -89,18 +84,17 @@ def __init__(self, config: Coqpit): self.xtts.tokenizer = VoiceBpeTokenizer(self.args.tokenizer_file) # init gpt encoder and hifigan decoder self.xtts.init_models() - # set mel stats - if self.args.mel_norm_file: - self.xtts.mel_stats = load_fsspec(self.args.mel_norm_file) if self.args.xtts_checkpoint: self.load_checkpoint(self.config, self.args.xtts_checkpoint, eval=False, strict=False) + # set mel stats + if self.args.mel_norm_file: + self.xtts.mel_stats = load_fsspec(self.args.mel_norm_file) + # load GPT if available if self.args.gpt_checkpoint: - gpt_checkpoint = torch.load( - self.args.gpt_checkpoint, map_location=torch.device("cpu") - ) + gpt_checkpoint = torch.load(self.args.gpt_checkpoint, map_location=torch.device("cpu")) # deal with coqui Trainer exported model if "model" in gpt_checkpoint.keys() and "config" in gpt_checkpoint.keys(): print("Coqui Trainer checkpoint detected! Converting it!") @@ -113,10 +107,15 @@ def __init__(self, config: Coqpit): del gpt_checkpoint[key] else: del gpt_checkpoint[key] - + # edit checkpoint if the number of tokens is changed to ensures the better transfer learning possible - if "text_embedding.weight" in gpt_checkpoint and gpt_checkpoint["text_embedding.weight"].shape != self.xtts.gpt.text_embedding.weight.shape: - num_new_tokens = self.xtts.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] + if ( + "text_embedding.weight" in gpt_checkpoint + and gpt_checkpoint["text_embedding.weight"].shape != self.xtts.gpt.text_embedding.weight.shape + ): + num_new_tokens = ( + self.xtts.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] + ) print(f" > Loading checkpoint with {num_new_tokens} additional tokens.") # add new tokens to a linear layer (text_head) @@ -156,7 +155,7 @@ def __init__(self, config: Coqpit): mel_fmin=0, mel_fmax=8000, n_mel_channels=80, - mel_norm_file=self.args.mel_norm_file + mel_norm_file=self.args.mel_norm_file, ) # Load DVAE @@ -175,17 +174,18 @@ def __init__(self, config: Coqpit): self.dvae.eval() if self.args.dvae_checkpoint: - dvae_checkpoint = torch.load( - self.args.dvae_checkpoint, map_location=torch.device("cpu") - ) + dvae_checkpoint = torch.load(self.args.dvae_checkpoint, map_location=torch.device("cpu")) self.dvae.load_state_dict(dvae_checkpoint, strict=False) print(">> DVAE weights restored from:", self.args.dvae_checkpoint) else: - raise RuntimeError("You need to specify config.model_args.dvae_checkpoint path to be able to train the GPT decoder!!") + raise RuntimeError( + "You need to specify config.model_args.dvae_checkpoint path to be able to train the GPT decoder!!" + ) # Mel spectrogram extractor for DVAE - self.torch_mel_spectrogram_dvae = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.dvae_sample_rate) - + self.torch_mel_spectrogram_dvae = TorchMelSpectrogram( + mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.dvae_sample_rate + ) @property def device(self): @@ -203,7 +203,9 @@ def forward(self, text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels cond_mels: MEL float tensor, (b, num_samples, 80,t_m) cond_idxs: cond start and end indexs, (b, 2) """ - losses = self.xtts.gpt(text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs) + losses = self.xtts.gpt( + text_inputs, text_lengths, audio_codes, wav_lengths, cond_mels=cond_mels, cond_idxs=cond_idxs + ) return losses @torch.no_grad() @@ -215,7 +217,9 @@ def test_run(self, assets) -> Tuple[Dict, Dict]: # pylint: disable=W0613 test_audios = {} print(" | > Synthesizing test sentences.") for idx, s_info in enumerate(self.config.test_sentences): - wav = self.xtts.synthesize(s_info["text"], self.config, s_info["speaker_wav"], s_info["language"])["wav"] + wav = self.xtts.synthesize( + s_info["text"], self.config, s_info["speaker_wav"], s_info["language"], gpt_cond_len=3 + )["wav"] test_audios["{}-audio".format(idx)] = wav # delete inference layers @@ -231,7 +235,7 @@ def test_log( def format_batch(self, batch: Dict) -> Dict: return batch - @torch.no_grad() # torch no grad to avoid gradients from the pre-processing and DVAE codes extraction + @torch.no_grad() # torch no grad to avoid gradients from the pre-processing and DVAE codes extraction def format_batch_on_device(self, batch): """Compute spectrograms on the device.""" batch["text_lengths"] = batch["text_lengths"] @@ -241,10 +245,10 @@ def format_batch_on_device(self, batch): # compute conditioning mel specs # transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor B, num_cond_samples, C, T = batch["conditioning"].size() - conditioning_reshaped = batch["conditioning"].view(B*num_cond_samples, C, T) + conditioning_reshaped = batch["conditioning"].view(B * num_cond_samples, C, T) paired_conditioning_mel = self.torch_mel_spectrogram_style_encoder(conditioning_reshaped) # transform torch.Size([B * num_cond_samples, n_mel, T_mel]) in torch.Size([B, num_cond_samples, n_mel, T_mel]) - n_mel = self.torch_mel_spectrogram_style_encoder.n_mel_channels # paired_conditioning_mel.size(1) + n_mel = self.torch_mel_spectrogram_style_encoder.n_mel_channels # paired_conditioning_mel.size(1) T_mel = paired_conditioning_mel.size(2) paired_conditioning_mel = paired_conditioning_mel.view(B, num_cond_samples, n_mel, T_mel) # get the conditioning embeddings @@ -300,6 +304,7 @@ def on_init_end(self, trainer): # pylint: disable=W0613 # ignore similarities.pth on clearml save/upload if self.config.dashboard_logger.lower() == "clearml": from clearml.binding.frameworks import WeightsFileHandler + WeightsFileHandler.add_pre_callback(callback_clearml_load_save) @torch.no_grad() @@ -367,16 +372,23 @@ def get_data_loader( return loader def get_optimizer(self) -> List: - """Initiate and return the optimizer based on the config parameters. - """ + """Initiate and return the optimizer based on the config parameters.""" # ToDo: deal with multi GPU training if self.config.optimizer_wd_only_on_weights: - # parameters to only GPT model + # parameters to only GPT model net = self.xtts.gpt # normalizations - norm_modules = (nn.BatchNorm2d, nn.InstanceNorm2d, nn.BatchNorm1d, nn.InstanceNorm1d, - nn.BatchNorm3d, nn.InstanceNorm3d, nn.GroupNorm, nn.LayerNorm) + norm_modules = ( + nn.BatchNorm2d, + nn.InstanceNorm2d, + nn.BatchNorm1d, + nn.InstanceNorm1d, + nn.BatchNorm3d, + nn.InstanceNorm3d, + nn.GroupNorm, + nn.LayerNorm, + ) # nn.Embedding emb_modules = (nn.Embedding, nn.EmbeddingBag) @@ -390,7 +402,7 @@ def get_optimizer(self) -> List: v.is_norm = isinstance(m, norm_modules) v.is_emb = isinstance(m, emb_modules) - fpn = '%s.%s' % (mn, k) if mn else k # full param name + fpn = "%s.%s" % (mn, k) if mn else k # full param name all_param_names.add(fpn) param_map[fpn] = v if v.is_bias or v.is_norm or v.is_emb: @@ -402,26 +414,26 @@ def get_optimizer(self) -> List: params_weights = [param_map[k] for k in params_names_weights] groups = [ - { 'params': params_weights, 'weight_decay': self.config.optimizer_params["weight_decay"]}, - { 'params': params_notweights, 'weight_decay': 0} + {"params": params_weights, "weight_decay": self.config.optimizer_params["weight_decay"]}, + {"params": params_notweights, "weight_decay": 0}, ] # torch.optim.AdamW opt = get_optimizer( - self.config.optimizer, - self.config.optimizer_params, - self.config.lr, - parameters=groups, - ) - opt._group_names = [params_names_weights, params_names_notweights] - return opt - - return get_optimizer( self.config.optimizer, self.config.optimizer_params, self.config.lr, - # optimize only for the GPT model - parameters=self.xtts.gpt.parameters(), + parameters=groups, ) + opt._group_names = [params_names_weights, params_names_notweights] + return opt + + return get_optimizer( + self.config.optimizer, + self.config.optimizer_params, + self.config.lr, + # optimize only for the GPT model + parameters=self.xtts.gpt.parameters(), + ) def get_scheduler(self, optimizer) -> List: """Set the scheduler for the optimizer. @@ -461,4 +473,3 @@ def init_from_config(config: "GPTTrainerConfig", samples: Union[List[List], List Defaults to None. """ return GPTTrainer(config) - \ No newline at end of file diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 3e6097997c..e2c8ca4c0f 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -11,15 +11,16 @@ from TTS.tts.layers.tortoise.diffusion_decoder import DiffusionTts from TTS.tts.layers.xtts.diffusion import SpacedDiffusion, get_named_beta_schedule, space_timesteps from TTS.tts.layers.xtts.gpt import GPT -from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer -from TTS.tts.layers.xtts.vocoder import UnivNetGenerator from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder from TTS.tts.layers.xtts.stream_generator import init_stream_support +from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer +from TTS.tts.layers.xtts.vocoder import UnivNetGenerator from TTS.tts.models.base_tts import BaseTTS from TTS.utils.io import load_fsspec init_stream_support() + def load_audio(audiopath, sr=22050): """ Load an audio file from disk and resample it to the specified sampling rate. @@ -332,7 +333,6 @@ def init_models(self): stop_audio_token=self.args.gpt_stop_audio_token, ) - if self.args.use_hifigan: self.hifigan_decoder = HifiDecoder( input_sample_rate=self.args.input_sample_rate, @@ -414,21 +414,20 @@ def get_diffusion_cond_latents( return diffusion_latent @torch.inference_mode() - def get_speaker_embedding( - self, - audio_path - ): + def get_speaker_embedding(self, audio_path): audio = load_audio(audio_path, self.hifigan_decoder.speaker_encoder_audio_config["sample_rate"]) - speaker_embedding = self.hifigan_decoder.speaker_encoder.forward( - audio.to(self.device), l2_norm=True - ).unsqueeze(-1).to(self.device) + speaker_embedding = ( + self.hifigan_decoder.speaker_encoder.forward(audio.to(self.device), l2_norm=True) + .unsqueeze(-1) + .to(self.device) + ) return speaker_embedding def get_conditioning_latents( self, audio_path, gpt_cond_len=3, - ): + ): speaker_embedding = None diffusion_cond_latents = None if self.args.use_hifigan: @@ -563,11 +562,9 @@ def full_inference( Generated audio clip(s) as a torch tensor. Shape 1,S if k=1 else, (k,1,S) where S is the sample length. Sample rate is 24kHz. """ - ( - gpt_cond_latent, - diffusion_conditioning, - speaker_embedding - ) = self.get_conditioning_latents(audio_path=ref_audio_path, gpt_cond_len=gpt_cond_len) + (gpt_cond_latent, diffusion_conditioning, speaker_embedding) = self.get_conditioning_latents( + audio_path=ref_audio_path, gpt_cond_len=gpt_cond_len + ) return self.inference( text, language, @@ -588,7 +585,7 @@ def full_inference( decoder=decoder, **hf_generate_kwargs, ) - + @torch.inference_mode() def inference( self, @@ -666,7 +663,7 @@ def inference( if ctokens > 8: gpt_latents = gpt_latents[:, :k] break - + if decoder == "hifigan": assert hasattr(self, "hifigan_decoder"), "You must enable hifigan decoder to use it by setting config `use_hifigan: true`" wav = self.hifigan_decoder(gpt_latents, g=speaker_embedding) @@ -721,7 +718,9 @@ def inference_stream( decoder="hifigan", **hf_generate_kwargs, ): - assert hasattr(self, "hifigan_decoder"), "`inference_stream` requires use_hifigan to be set to true in the config.model_args, diffusion is too slow to stream." + assert hasattr( + self, "hifigan_decoder" + ), "`inference_stream` requires use_hifigan to be set to true in the config.model_args, diffusion is too slow to stream." text = f"[{language}]{text.strip().lower()}" text_tokens = torch.IntTensor(self.tokenizer.encode(text, lang=language)).unsqueeze(0).to(self.device) @@ -793,7 +792,7 @@ def load_checkpoint( self, config, checkpoint_dir=None, - checkpoint_path=None, + checkpoint_path=None, vocab_path=None, eval=True, strict=True, @@ -827,6 +826,15 @@ def load_checkpoint( ignore_keys += [] if self.args.use_hifigan else ["hifigan_decoder"] ignore_keys += [] if self.args.use_ne_hifigan else ["ne_hifigan_decoder"] for key in list(checkpoint.keys()): + # check if it is from the coqui Trainer if so convert it + if key.startswith("xtts."): + coqui_trainer_checkpoint = True + new_key = key.replace("xtts.", "") + checkpoint[new_key] = checkpoint[key] + del checkpoint[key] + key = new_key + + # remove unused keys if key.split(".")[0] in ignore_keys: del checkpoint[key] diff --git a/recipes/ljspeech/xtts_v1/train_xtts.py b/recipes/ljspeech/xtts_v1/train_xtts.py new file mode 100644 index 0000000000..6c07053b31 --- /dev/null +++ b/recipes/ljspeech/xtts_v1/train_xtts.py @@ -0,0 +1,145 @@ +from trainer import Trainer, TrainerArgs + +from TTS.config.shared_configs import BaseDatasetConfig +from TTS.tts.datasets import load_tts_samples +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig + +# Define here the dataset used +config_ljspeech = BaseDatasetConfig( + formatter="ljspeech", + dataset_name="ljspeech", + path="/raid/datasets/LJSpeech-1.1_24khz/", + meta_file_train="/raid/datasets/LJSpeech-1.1_24khz/metadata.csv", + language="en", +) + +DATASETS_CONFIG_LIST = [config_ljspeech] + + +def freeze_layers(trainer): + pass + + +def main(): + # init args and config + model_args = GPTArgs( + max_conditioning_length=132300, # 6 secs + min_conditioning_length=66150, # 3 secs + debug_loading_failures=False, + max_wav_length=255995, # ~11.6 seconds + max_text_length=200, + mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", + dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", + # tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune + # xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", + xtts_checkpoint="/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/132500_gpt_ema_coqui_tts_with_enhanced_hifigan.pth", # checkpoint path of the model that you want to fine-tune + tokenizer_file="/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/tokenizer_merged_5.json", + gpt_num_audio_tokens=8194, + gpt_start_audio_token=8192, + gpt_stop_audio_token=8193, + ) + audio_config = XttsAudioConfig( + sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 # GPT SR + ) + config = GPTTrainerConfig( + output_path=OUT_PATH, + model_args=model_args, + run_name=RUN_NAME, + project_name=PROJECT_NAME, + run_description=""" + GPT XTTS training + """, + dashboard_logger=DASHBOARD_LOGGER, + logger_uri=LOGGER_URI, + audio=audio_config, + batch_size=BATCH_SIZE, + batch_group_size=48, + eval_batch_size=BATCH_SIZE, + num_loader_workers=8, + eval_split_max_size=256, + print_step=50, + plot_step=100, + log_model_step=1000, + save_step=10000, + save_n_checkpoints=1, + save_checkpoints=True, + # target_loss="loss", + print_eval=False, + # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters. + optimizer="AdamW", + optimizer_wd_only_on_weights=True, # for multi-gpu training turn it off + optimizer_params={"betas": [0.9, 0.96], "eps": 1e-8, "weight_decay": 1e-2}, + lr=5e-06, # learning rate + lr_scheduler="MultiStepLR", + # it was adjusted accordly for the new step scheme + lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, + test_sentences=[ + { + "text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", + "language": "en", + }, + { + "text": "This cake is great. It's so delicious and moist.", + "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", + "language": "en", + }, + { + "text": "Levei muito tempo para desenvolver uma voz e agora que a tenho não vou ficar calado .", + "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", + "language": "pt", + }, + ], + ) + + # init the model from config + model = GPTTrainer.init_from_config(config) + + # load training samples + train_samples, eval_samples = load_tts_samples( + DATASETS_CONFIG_LIST, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, + ) + + # init the trainer and 🚀 + trainer = Trainer( + TrainerArgs( + restore_path=RESTORE_PATH, + skip_train_epoch=SKIP_TRAIN_EPOCH, + start_with_eval=START_WITH_EVAL, + grad_accum_steps=GRAD_ACUMM_STEPS, + ), + config, + output_path=OUT_PATH, + model=model, + train_samples=train_samples, + eval_samples=eval_samples, + callbacks={"on_epoch_start": freeze_layers}, + ) + trainer.fit() + + +if __name__ == "__main__": + RUN_NAME = "GPT_XTTS_LJSpeech_fixed" + PROJECT_NAME = "XTTS_trainer" + OUT_PATH = "/raid/edresson/dev/Checkpoints/XTTS_v1_FT/" + # DASHBOARD_LOGGER = "clearml" + # LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_v1/" + DASHBOARD_LOGGER = "tensorboard" + LOGGER_URI = None + RESTORE_PATH = None + SKIP_TRAIN_EPOCH = False + START_WITH_EVAL = True + BATCH_SIZE = 3 + GRAD_ACUMM_STEPS = 28 * 3 + + # debug + # DASHBOARD_LOGGER = "tensorboard" + # LOGGER_URI = None + # RESTORE_PATH = None + # BATCH_SIZE = 2 + # GRAD_ACUMM_STEPS = 1 + + main() diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py index f36bf1ae44..fa13d8d4b8 100644 --- a/recipes/multilingual/xtts_v1/train_xtts.py +++ b/recipes/multilingual/xtts_v1/train_xtts.py @@ -2,9 +2,7 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples - -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTTrainer, GPTArgs, XttsAudioConfig, GPTTrainerConfig - +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig config_coqui_MLS_metadata_train_with_previous_audio_key_de = BaseDatasetConfig( formatter="coqui", @@ -252,32 +250,34 @@ # DATASETS_CONFIG_LIST = [config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it] -DATASETS_CONFIG_LIST = [config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_italian_metadata_with_previous_audio_key_it] - +DATASETS_CONFIG_LIST = [ + config_coqui_MLS_metadata_test_with_previous_audio_key_de, + config_coqui_mls_italian_metadata_with_previous_audio_key_it, +] + + def freeze_layers(trainer): pass + def main(): # init args and config model_args = GPTArgs( - max_conditioning_length=132300, # 6 secs - min_conditioning_length=66150, # 3 secs + max_conditioning_length=132300, # 6 secs + min_conditioning_length=66150, # 3 secs debug_loading_failures=False, - max_wav_length=255995, # ~11.6 seconds + max_wav_length=255995, # ~11.6 seconds max_text_length=200, mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", - tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune - xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", # checkpoint path of the model that you want to fine-tune + tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune + xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", # checkpoint path of the model that you want to fine-tune gpt_num_audio_tokens=8194, gpt_start_audio_token=8192, gpt_stop_audio_token=8193, ) audio_config = XttsAudioConfig( - sample_rate=22050, # GPT SR - dvae_sample_rate=22050, - diffusion_sample_rate=24000, - output_sample_rate=24000 + sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 # GPT SR ) config = GPTTrainerConfig( output_path=OUT_PATH, @@ -303,20 +303,26 @@ def main(): save_checkpoints=True, # target_loss="loss", print_eval=False, - # Optimizer values like tortoise. However, they used pytorch implementation with modifications to not apply WD to non-weight parameters. We are using default Pytorch + # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters. optimizer="AdamW", - optimizer_wd_only_on_weights=True, - optimizer_params={"betas": [.9, .96], "eps": 1e-8, "weight_decay": 1e-2}, - lr=5e-06, # learning rate - # lr=1e-4, # learning rate - # ToDo: implement 500 step warmup like tortoise and EMA weights replaces LR decay with rate: .999 + optimizer_wd_only_on_weights=True, # for multi-gpu training turn it off + optimizer_params={"betas": [0.9, 0.96], "eps": 1e-8, "weight_decay": 1e-2}, + lr=5e-06, # learning rate lr_scheduler="MultiStepLR", # it was adjusted accordly for the new step scheme lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, test_sentences=[ - {"text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", "speaker_wav": "/raid/edresson/dev/ref.wav", "language": "en"}, - {"text": "This cake is great. It's so delicious and moist.", "speaker_wav": "/raid/edresson/dev/ref.wav", "language": "en"}, - ] + { + "text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "speaker_wav": "/raid/edresson/dev/ref.wav", + "language": "en", + }, + { + "text": "This cake is great. It's so delicious and moist.", + "speaker_wav": "/raid/edresson/dev/ref.wav", + "language": "en", + }, + ], ) # init the model from config @@ -332,13 +338,18 @@ def main(): # init the trainer and 🚀 trainer = Trainer( - TrainerArgs(restore_path=RESTORE_PATH, skip_train_epoch=SKIP_TRAIN_EPOCH, start_with_eval=START_WITH_EVAL, grad_accum_steps=GRAD_ACUMM_STEPS), + TrainerArgs( + restore_path=RESTORE_PATH, + skip_train_epoch=SKIP_TRAIN_EPOCH, + start_with_eval=START_WITH_EVAL, + grad_accum_steps=GRAD_ACUMM_STEPS, + ), config, output_path=OUT_PATH, model=model, train_samples=train_samples, eval_samples=eval_samples, - callbacks={"on_epoch_start": freeze_layers} + callbacks={"on_epoch_start": freeze_layers}, ) trainer.fit() @@ -351,17 +362,15 @@ def main(): LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_style_emb/" RESTORE_PATH = None SKIP_TRAIN_EPOCH = False - START_WITH_EVAL = True + START_WITH_EVAL = True BATCH_SIZE = 9 GRAD_ACUMM_STEPS = 28 # debug # DASHBOARD_LOGGER = "tensorboard" - # LOGGER_URI = None + # LOGGER_URI = None # RESTORE_PATH = None BATCH_SIZE = 2 GRAD_ACUMM_STEPS = 1 - - main() diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index db144f1ccf..7194ed5c33 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -99,6 +99,7 @@ def test_xtts_streaming(): """Testing the new inference_stream method""" from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import Xtts + speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav") model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1") config = XttsConfig() @@ -115,7 +116,7 @@ def test_xtts_streaming(): "It took me quite a long time to develop a voice and now that I have it I am not going to be silent.", "en", gpt_cond_latent, - speaker_embedding + speaker_embedding, ) wav_chuncks = [] for i, chunk in enumerate(chunks): From 469d624615f32e38074ee70ebc8d5f841dcc0298 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 10:16:14 -0300 Subject: [PATCH 08/24] Update LJspeech XTTS recipe --- recipes/ljspeech/xtts_v1/train_xtts.py | 93 ++--- recipes/multilingual/xtts_v1/train_xtts.py | 376 --------------------- 2 files changed, 50 insertions(+), 419 deletions(-) delete mode 100644 recipes/multilingual/xtts_v1/train_xtts.py diff --git a/recipes/ljspeech/xtts_v1/train_xtts.py b/recipes/ljspeech/xtts_v1/train_xtts.py index 6c07053b31..641d050cb9 100644 --- a/recipes/ljspeech/xtts_v1/train_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_xtts.py @@ -1,11 +1,29 @@ +import os + from trainer import Trainer, TrainerArgs from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig -# Define here the dataset used -config_ljspeech = BaseDatasetConfig( +# Logging parameters +RUN_NAME = "GPT_XTTS_LJSpeech_FT" +PROJECT_NAME = "XTTS_trainer" +DASHBOARD_LOGGER = "tensorboard" +LOGGER_URI = None + +# Set here the path that the checkpoints will be saved. Default: ./run/training/ +OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run", "training") + +# Training Parameters +OPTIMIZER_WD_ONLY_ON_WEIGHTS = True # for multi-gpu training please make it False +START_WITH_EVAL = True # if True it will star with evaluation +BATCH_SIZE = 3 # set here the batch size +GRAD_ACUMM_STEPS = 84 # set here the grad accumulation steps +# Note: we recommend that BATCH_SIZE * GRAD_ACUMM_STEPS need to be at least 252 for more efficient training. You can increase/decrease BATCH_SIZE but then set GRAD_ACUMM_STEPS accordingly. + +# Define here the dataset that you want to use for the fine tuning +config_dataset = BaseDatasetConfig( formatter="ljspeech", dataset_name="ljspeech", path="/raid/datasets/LJSpeech-1.1_24khz/", @@ -13,11 +31,26 @@ language="en", ) -DATASETS_CONFIG_LIST = [config_ljspeech] +DATASETS_CONFIG_LIST = [config_dataset] + +# ToDo: update with the latest released checkpoints + +# DVAE parameters: For the training we need the dvae to extract the dvae tokens, given that you must provide the paths for this model +DVAE_CHECKPOINT = "/raid/datasets/xtts_models/dvae.pth" # DVAE checkpoint +MEL_NORM_FILE = ( + "/raid/datasets/xtts_models/mel_stats.pth" # Mel spectrogram norms, required for dvae mel spectrogram extraction +) + +# XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. +TOKENIZER_FILE = "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/tokenizer_merged_5.json" # vocab.json file +XTTS_CHECKPOINT = "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/132500_gpt_ema_coqui_tts_with_enhanced_hifigan.pth" # model.pth file -def freeze_layers(trainer): - pass +# Training sentences generations +SPEAKER_REFERENCE = ( + "./tests/data/ljspeech/wavs/LJ001-0002.wav" # speaker reference to be used in training test sentences +) +LANGUAGE = config_dataset.language def main(): @@ -28,18 +61,18 @@ def main(): debug_loading_failures=False, max_wav_length=255995, # ~11.6 seconds max_text_length=200, - mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", - dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", + mel_norm_file=MEL_NORM_FILE, + dvae_checkpoint=DVAE_CHECKPOINT, # tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune # xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", - xtts_checkpoint="/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/132500_gpt_ema_coqui_tts_with_enhanced_hifigan.pth", # checkpoint path of the model that you want to fine-tune - tokenizer_file="/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/tokenizer_merged_5.json", + xtts_checkpoint=XTTS_CHECKPOINT, # checkpoint path of the model that you want to fine-tune + tokenizer_file=TOKENIZER_FILE, gpt_num_audio_tokens=8194, gpt_start_audio_token=8192, gpt_stop_audio_token=8193, ) audio_config = XttsAudioConfig( - sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 # GPT SR + sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 ) config = GPTTrainerConfig( output_path=OUT_PATH, @@ -67,7 +100,7 @@ def main(): print_eval=False, # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters. optimizer="AdamW", - optimizer_wd_only_on_weights=True, # for multi-gpu training turn it off + optimizer_wd_only_on_weights=OPTIMIZER_WD_ONLY_ON_WEIGHTS, optimizer_params={"betas": [0.9, 0.96], "eps": 1e-8, "weight_decay": 1e-2}, lr=5e-06, # learning rate lr_scheduler="MultiStepLR", @@ -76,18 +109,13 @@ def main(): test_sentences=[ { "text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", - "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", - "language": "en", + "speaker_wav": SPEAKER_REFERENCE, + "language": LANGUAGE, }, { "text": "This cake is great. It's so delicious and moist.", - "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", - "language": "en", - }, - { - "text": "Levei muito tempo para desenvolver uma voz e agora que a tenho não vou ficar calado .", - "speaker_wav": "/raid/edresson/dev/ref-ljspeech.wav", - "language": "pt", + "speaker_wav": SPEAKER_REFERENCE, + "language": LANGUAGE, }, ], ) @@ -106,8 +134,8 @@ def main(): # init the trainer and 🚀 trainer = Trainer( TrainerArgs( - restore_path=RESTORE_PATH, - skip_train_epoch=SKIP_TRAIN_EPOCH, + restore_path=None, # xtts checkpoint is restored via xtts_checkpoint key so no need of restore it using Trainer restore_path parameter + skip_train_epoch=False, start_with_eval=START_WITH_EVAL, grad_accum_steps=GRAD_ACUMM_STEPS, ), @@ -116,30 +144,9 @@ def main(): model=model, train_samples=train_samples, eval_samples=eval_samples, - callbacks={"on_epoch_start": freeze_layers}, ) trainer.fit() if __name__ == "__main__": - RUN_NAME = "GPT_XTTS_LJSpeech_fixed" - PROJECT_NAME = "XTTS_trainer" - OUT_PATH = "/raid/edresson/dev/Checkpoints/XTTS_v1_FT/" - # DASHBOARD_LOGGER = "clearml" - # LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_v1/" - DASHBOARD_LOGGER = "tensorboard" - LOGGER_URI = None - RESTORE_PATH = None - SKIP_TRAIN_EPOCH = False - START_WITH_EVAL = True - BATCH_SIZE = 3 - GRAD_ACUMM_STEPS = 28 * 3 - - # debug - # DASHBOARD_LOGGER = "tensorboard" - # LOGGER_URI = None - # RESTORE_PATH = None - # BATCH_SIZE = 2 - # GRAD_ACUMM_STEPS = 1 - main() diff --git a/recipes/multilingual/xtts_v1/train_xtts.py b/recipes/multilingual/xtts_v1/train_xtts.py deleted file mode 100644 index fa13d8d4b8..0000000000 --- a/recipes/multilingual/xtts_v1/train_xtts.py +++ /dev/null @@ -1,376 +0,0 @@ -from trainer import Trainer, TrainerArgs - -from TTS.config.shared_configs import BaseDatasetConfig -from TTS.tts.datasets import load_tts_samples -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig - -config_coqui_MLS_metadata_train_with_previous_audio_key_de = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_german", - meta_file_train="metadata_train_with_previous_audio_key.csv", - language="de", -) - - -config_coqui_MLS_metadata_test_with_previous_audio_key_de = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_german", - meta_file_train="metadata_test_with_previous_audio_key.csv", - language="de", -) - - -config_coqui_MLS_metadata_dev_with_previous_audio_key_de = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_german", - meta_file_train="metadata_dev_with_previous_audio_key.csv", - language="de", -) - - -config_coqui_mls_french_metadata_with_previous_audio_key_fr = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_french/", - meta_file_train="metadata_with_previous_audio_key.csv", - language="fr", -) - - -config_coqui_mls_spanish_metadata_with_previous_audio_key_es = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_spanish/", - meta_file_train="/raid/datasets/MLS/mls_spanish/metadata_with_previous_audio_key.csv", - language="es", -) - - -config_coqui_mls_italian_metadata_with_previous_audio_key_it = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_italian/", - meta_file_train="/raid/datasets/MLS/mls_italian/metadata_with_previous_audio_key.csv", - language="it", -) - - -config_coqui_mls_portuguese_metadata_with_previous_audio_key_pt = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_portuguese/", - meta_file_train="/raid/datasets/MLS/mls_portuguese/metadata_with_previous_audio_key.csv", - language="pt", -) - - -config_coqui_mls_polish_metadata_with_previous_audio_key_pl = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/MLS/mls_polish/", - meta_file_train="/raid/datasets/MLS/mls_polish/metadata_with_previous_audio_key.csv", - language="pl", -) - - -config_coqui_common_voice_metafile_it_train_with_scores_it = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_it_train_with_scores.csv", - language="it", -) - - -config_coqui_common_voice_metafile_it_test_with_scores_it = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_it_test_with_scores.csv", - language="it", -) - - -config_coqui_common_voice_metafile_it_dev_with_scores_it = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_it_dev_with_scores.csv", - language="it", -) - - -config_coqui_common_voice_metafile_pt_train_with_scores_pt = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_pt_train_with_scores.csv", - language="pt", -) - - -config_coqui_common_voice_metafile_pt_test_with_scores_pt = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_pt_test_with_scores.csv", - language="pt", -) - - -config_coqui_common_voice_metafile_pt_dev_with_scores_pt = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_pt_dev_with_scores.csv", - language="pt", -) - - -config_coqui_common_voice_metafile_en_train_en = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_en_train.csv", - language="en", -) - - -config_coqui_common_voice_metafile_en_test_en = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_en_test.csv", - language="en", -) - - -config_coqui_common_voice_metafile_en_dev_en = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_en_dev.csv", - language="en", -) - - -config_coqui_common_voice_metafile_tr_validated_tr = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_tr_validated.csv", - language="tr", -) - - -config_coqui_common_voice_metafile_ru_validated_ru = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_ru_validated.csv", - language="ru", -) - - -config_coqui_common_voice_metafile_nl_validated_nl = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_nl_validated.csv", - language="nl", -) - - -config_coqui_common_voice_metafile_cs_validated_cs = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_cs_validated.csv", - language="cs", -) - - -config_coqui_common_voice_metafile_fr_validated_fr = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_fr_validated.csv", - language="fr", -) - - -config_coqui_common_voice_metafile_es_validated_es = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_es_validated.csv", - language="es", -) - - -config_coqui_common_voice_metafile_pl_validated_pl = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_pl_validated.csv", - language="pl", -) - - -config_coqui_common_voice_metafile_ar_validated_ar = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_ar_validated.csv", - language="ar", -) - - -config_coqui_common_voice_metafile_zh_CN_validated_zh_cn = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_zh-CN_validated.csv", - language="zh-cn", -) - - -config_coqui_common_voice_metafile_ja_validated_ja = BaseDatasetConfig( - formatter="coqui", - dataset_name="coqui", - path="/raid/datasets/common_voice/", - meta_file_train="/raid/datasets/common_voice/metafile_ja_validated.csv", - language="ja", -) - -# DATASETS_CONFIG_LIST=[config_coqui_MLS_metadata_train_with_previous_audio_key_de, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_MLS_metadata_dev_with_previous_audio_key_de, config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it, config_coqui_mls_portuguese_metadata_with_previous_audio_key_pt, config_coqui_mls_polish_metadata_with_previous_audio_key_pl, config_coqui_common_voice_metafile_it_train_with_scores_it, config_coqui_common_voice_metafile_it_test_with_scores_it, config_coqui_common_voice_metafile_it_dev_with_scores_it, config_coqui_common_voice_metafile_pt_train_with_scores_pt, config_coqui_common_voice_metafile_pt_test_with_scores_pt, config_coqui_common_voice_metafile_pt_dev_with_scores_pt, config_coqui_common_voice_metafile_en_train_en, config_coqui_common_voice_metafile_en_test_en, config_coqui_common_voice_metafile_en_dev_en, config_coqui_common_voice_metafile_tr_validated_tr, config_coqui_common_voice_metafile_ru_validated_ru, config_coqui_common_voice_metafile_nl_validated_nl, config_coqui_common_voice_metafile_cs_validated_cs, config_coqui_common_voice_metafile_fr_validated_fr, config_coqui_common_voice_metafile_es_validated_es, config_coqui_common_voice_metafile_pl_validated_pl, config_coqui_common_voice_metafile_ar_validated_ar, config_coqui_common_voice_metafile_zh_CN_validated_zh_cn, config_coqui_common_voice_metafile_ja_validated_ja] - -# DATASETS_CONFIG_LIST = [config_coqui_mls_french_metadata_with_previous_audio_key_fr, config_coqui_MLS_metadata_test_with_previous_audio_key_de, config_coqui_mls_spanish_metadata_with_previous_audio_key_es, config_coqui_mls_italian_metadata_with_previous_audio_key_it] - -DATASETS_CONFIG_LIST = [ - config_coqui_MLS_metadata_test_with_previous_audio_key_de, - config_coqui_mls_italian_metadata_with_previous_audio_key_it, -] - - -def freeze_layers(trainer): - pass - - -def main(): - # init args and config - model_args = GPTArgs( - max_conditioning_length=132300, # 6 secs - min_conditioning_length=66150, # 3 secs - debug_loading_failures=False, - max_wav_length=255995, # ~11.6 seconds - max_text_length=200, - mel_norm_file="/raid/datasets/xtts_models/mel_stats.pth", - dvae_checkpoint="/raid/datasets/xtts_models/dvae.pth", - tokenizer_file="/raid/datasets/xtts_models/vocab.json", # vocab path of the model that you want to fine-tune - xtts_checkpoint="https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", # checkpoint path of the model that you want to fine-tune - gpt_num_audio_tokens=8194, - gpt_start_audio_token=8192, - gpt_stop_audio_token=8193, - ) - audio_config = XttsAudioConfig( - sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 # GPT SR - ) - config = GPTTrainerConfig( - output_path=OUT_PATH, - model_args=model_args, - run_name=RUN_NAME, - project_name=PROJECT_NAME, - run_description=""" - GPT XTTS training - """, - dashboard_logger=DASHBOARD_LOGGER, - logger_uri=LOGGER_URI, - audio=audio_config, - batch_size=BATCH_SIZE, - batch_group_size=48, - eval_batch_size=BATCH_SIZE, - num_loader_workers=8, - eval_split_max_size=256, - print_step=50, - plot_step=100, - log_model_step=1000, - save_step=10000, - save_n_checkpoints=1, - save_checkpoints=True, - # target_loss="loss", - print_eval=False, - # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters. - optimizer="AdamW", - optimizer_wd_only_on_weights=True, # for multi-gpu training turn it off - optimizer_params={"betas": [0.9, 0.96], "eps": 1e-8, "weight_decay": 1e-2}, - lr=5e-06, # learning rate - lr_scheduler="MultiStepLR", - # it was adjusted accordly for the new step scheme - lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, - test_sentences=[ - { - "text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", - "speaker_wav": "/raid/edresson/dev/ref.wav", - "language": "en", - }, - { - "text": "This cake is great. It's so delicious and moist.", - "speaker_wav": "/raid/edresson/dev/ref.wav", - "language": "en", - }, - ], - ) - - # init the model from config - model = GPTTrainer.init_from_config(config) - - # load training samples - train_samples, eval_samples = load_tts_samples( - DATASETS_CONFIG_LIST, - eval_split=True, - eval_split_max_size=config.eval_split_max_size, - eval_split_size=config.eval_split_size, - ) - - # init the trainer and 🚀 - trainer = Trainer( - TrainerArgs( - restore_path=RESTORE_PATH, - skip_train_epoch=SKIP_TRAIN_EPOCH, - start_with_eval=START_WITH_EVAL, - grad_accum_steps=GRAD_ACUMM_STEPS, - ), - config, - output_path=OUT_PATH, - model=model, - train_samples=train_samples, - eval_samples=eval_samples, - callbacks={"on_epoch_start": freeze_layers}, - ) - trainer.fit() - - -if __name__ == "__main__": - RUN_NAME = "GPT_XTTS" - PROJECT_NAME = "XTTS_trainer" - OUT_PATH = "/raid/edresson/dev/Checkpoints/XTTS_style_emb/" - DASHBOARD_LOGGER = "clearml" - LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/XTTS_style_emb/" - RESTORE_PATH = None - SKIP_TRAIN_EPOCH = False - START_WITH_EVAL = True - BATCH_SIZE = 9 - GRAD_ACUMM_STEPS = 28 - - # debug - # DASHBOARD_LOGGER = "tensorboard" - # LOGGER_URI = None - # RESTORE_PATH = None - BATCH_SIZE = 2 - GRAD_ACUMM_STEPS = 1 - - main() From 5f98dbeec91096e9f0869c4950fdd66db80d0e2f Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 10:34:40 -0300 Subject: [PATCH 09/24] Update Ljspeech XTTS recipe --- TTS/tts/models/xtts.py | 1 + recipes/ljspeech/xtts_v1/train_xtts.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index e2c8ca4c0f..55a10751e5 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -855,4 +855,5 @@ def load_checkpoint( self.gpt.eval() def train_step(self): + # ToDo: Add here the link of documentation for XTTS FT raise NotImplementedError("XTTS Training is not implemented") diff --git a/recipes/ljspeech/xtts_v1/train_xtts.py b/recipes/ljspeech/xtts_v1/train_xtts.py index 641d050cb9..7e564bad9d 100644 --- a/recipes/ljspeech/xtts_v1/train_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_xtts.py @@ -52,7 +52,6 @@ ) LANGUAGE = config_dataset.language - def main(): # init args and config model_args = GPTArgs( From 94dcf849791ac9e42608dc695aba5cd6103f7f08 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 10:53:12 -0300 Subject: [PATCH 10/24] Rename XTTS recipe --- recipes/ljspeech/xtts_v1/{train_xtts.py => train_gpt_xtts.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename recipes/ljspeech/xtts_v1/{train_xtts.py => train_gpt_xtts.py} (100%) diff --git a/recipes/ljspeech/xtts_v1/train_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py similarity index 100% rename from recipes/ljspeech/xtts_v1/train_xtts.py rename to recipes/ljspeech/xtts_v1/train_gpt_xtts.py From 1f92741d6af03e8350bc38f69c32a6d22fe6e16c Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 15:14:26 -0300 Subject: [PATCH 11/24] Fix issue #2971 --- TTS/tts/layers/xtts/gpt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index 8f24ac0154..e7c0a41a77 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -559,7 +559,7 @@ def generate( bos_token_id=self.start_audio_token, pad_token_id=self.stop_audio_token, eos_token_id=self.stop_audio_token, - max_length=self.max_mel_tokens * 2 + self.max_prompt_tokens + self.max_text_tokens, + max_length=self.max_mel_tokens, **hf_generate_kwargs, ) if "return_dict_in_generate" in hf_generate_kwargs: @@ -572,7 +572,7 @@ def get_generator(self, fake_inputs, **hf_generate_kwargs): bos_token_id=self.start_audio_token, pad_token_id=self.stop_audio_token, eos_token_id=self.stop_audio_token, - max_length=self.max_mel_tokens * 2 + self.max_prompt_tokens + self.max_text_tokens, + max_length=self.max_mel_tokens, do_stream=True, **hf_generate_kwargs, ) From affaf11148babd54c5cc2371f990ca154209af9a Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Wed, 18 Oct 2023 15:33:05 -0300 Subject: [PATCH 12/24] Add XTTS training unit test --- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 5 +- TTS/tts/models/xtts.py | 37 +- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 1 + tests/inputs/xtts_vocab.json | 12669 +++++++++++++++++++ tests/tts_tests2/test_xtts_gpt_train.py | 163 + 5 files changed, 12858 insertions(+), 17 deletions(-) create mode 100644 tests/inputs/xtts_vocab.json create mode 100644 tests/tts_tests2/test_xtts_gpt_train.py diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index e4df2b90d5..22577ad495 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -268,6 +268,7 @@ def format_batch_on_device(self, batch): dvae_wav = batch["wav"] dvae_mel_spec = self.torch_mel_spectrogram_dvae(dvae_wav) codes = self.dvae.get_codebook_indices(dvae_mel_spec) + batch["audio_codes"] = codes # delete useless batch tensors del batch["padded_text"] @@ -454,7 +455,9 @@ def load_checkpoint( target_options={"anon": True}, ): # pylint: disable=unused-argument, disable=W0201, disable=W0102, redefined-builtin """Load the model checkpoint and setup for training or inference""" - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))["model"] + + state, _ = self.xtts.get_compatible_checkpoint_state(checkpoint_path) + # load the model weights self.xtts.load_state_dict(state, strict=strict) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 55a10751e5..1031e4bdd2 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -643,6 +643,7 @@ def inference( expected_output_len = torch.tensor( [gpt_codes.shape[-1] * self.gpt.code_stride_len], device=text_tokens.device ) + text_len = torch.tensor([text_tokens.shape[-1]], device=self.device) gpt_latents = self.gpt( text_tokens, @@ -788,6 +789,25 @@ def eval(self): # pylint: disable=redefined-builtin self.gpt.init_gpt_for_inference() super().eval() + def get_compatible_checkpoint_state_dict(self, model_path): + checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"))["model"] + ignore_keys = ["diffusion_decoder", "vocoder"] if self.args.use_hifigan or self.args.use_ne_hifigan else [] + ignore_keys += [] if self.args.use_hifigan else ["hifigan_decoder"] + ignore_keys += [] if self.args.use_ne_hifigan else ["ne_hifigan_decoder"] + for key in list(checkpoint.keys()): + # check if it is from the coqui Trainer if so convert it + if key.startswith("xtts."): + new_key = key.replace("xtts.", "") + checkpoint[new_key] = checkpoint[key] + del checkpoint[key] + key = new_key + + # remove unused keys + if key.split(".")[0] in ignore_keys: + del checkpoint[key] + + return checkpoint + def load_checkpoint( self, config, @@ -821,22 +841,7 @@ def load_checkpoint( self.init_models() - checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"))["model"] - ignore_keys = ["diffusion_decoder", "vocoder"] if self.args.use_hifigan or self.args.use_ne_hifigan else [] - ignore_keys += [] if self.args.use_hifigan else ["hifigan_decoder"] - ignore_keys += [] if self.args.use_ne_hifigan else ["ne_hifigan_decoder"] - for key in list(checkpoint.keys()): - # check if it is from the coqui Trainer if so convert it - if key.startswith("xtts."): - coqui_trainer_checkpoint = True - new_key = key.replace("xtts.", "") - checkpoint[new_key] = checkpoint[key] - del checkpoint[key] - key = new_key - - # remove unused keys - if key.split(".")[0] in ignore_keys: - del checkpoint[key] + checkpoint = self.get_compatible_checkpoint_state_dict(model_path) # deal with v1 and v1.1. V1 has the init_gpt_for_inference keys, v1.1 do not try: diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 7e564bad9d..641d050cb9 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -52,6 +52,7 @@ ) LANGUAGE = config_dataset.language + def main(): # init args and config model_args = GPTArgs( diff --git a/tests/inputs/xtts_vocab.json b/tests/inputs/xtts_vocab.json new file mode 100644 index 0000000000..a3c6dcec77 --- /dev/null +++ b/tests/inputs/xtts_vocab.json @@ -0,0 +1,12669 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 259, + "special": true, + "content": "[en]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 260, + "special": true, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 261, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 262, + "special": true, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 284, + "special": true, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 285, + "special": true, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 286, + "special": true, + "content": "[pt]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 294, + "special": true, + "content": "[pl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 295, + "special": true, + "content": "[tr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 267, + "special": true, + "content": "[ru]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 293, + "special": true, + "content": "[cs]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 297, + "special": true, + "content": "[nl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5022, + "special": true, + "content": "[ar]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5023, + "special": true, + "content": "[zh-cn]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 5412, + "special": true, + "content": "[ja]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "ß": 255, + "ä": 256, + "ö": 257, + "ü": 258, + "[en]": 259, + "[de]": 260, + "[START]": 261, + "[fr]": 262, + "œ": 263, + "ï": 264, + "ê": 265, + "â": 266, + "[ru]": 267, + "ÿ": 268, + "è": 269, + "à": 270, + "ë": 271, + "ù": 272, + "î": 273, + "ç": 274, + "æ": 275, + "ô": 276, + "û": 277, + "á": 278, + "é": 279, + "í": 280, + "ó": 281, + "ú": 282, + "ñ": 283, + "[es]": 284, + "[it]": 285, + "[pt]": 286, + "ń": 287, + "ś": 288, + "ę": 289, + "ą": 290, + "ż": 291, + "ć": 292, + "[cs]": 293, + "[pl]": 294, + "[tr]": 295, + "ã": 296, + "[nl]": 297, + "ş": 298, + "ğ": 299, + "ı": 300, + "ò": 301, + "ì": 302, + "¿": 303, + "…": 304, + "i̇": 305, + "õ": 306, + "\"": 307, + "´": 308, + "ø": 309, + "č": 310, + "ō": 311, + "š": 312, + "ž": 313, + "̇": 314, + "ei": 315, + "ich": 316, + "ein": 317, + "au": 318, + "sch": 319, + "und": 320, + "die": 321, + "da": 322, + "den": 323, + "gen": 324, + "zu": 325, + "hr": 326, + "ten": 327, + "mi": 328, + "sie": 329, + "das": 330, + "eine": 331, + "icht": 332, + "ber": 333, + "ach": 334, + "auf": 335, + "lich": 336, + "nicht": 337, + "mm": 338, + "ben": 339, + "war": 340, + "mit": 341, + "sich": 342, + "ig": 343, + "aus": 344, + "ist": 345, + "wie": 346, + "och": 347, + "ung": 348, + "ann": 349, + "ür": 350, + "hn": 351, + "ihr": 352, + "sen": 353, + "tz": 354, + "dem": 355, + "eit": 356, + "hat": 357, + "wir": 358, + "von": 359, + "wei": 360, + "ier": 361, + "ra": 362, + "einen": 363, + "vor": 364, + "als": 365, + "wo": 366, + "rei": 367, + "ste": 368, + "lie": 369, + "auch": 370, + "du": 371, + "des": 372, + "ko": 373, + "über": 374, + "bei": 375, + "hen": 376, + "hm": 377, + "lei": 378, + "aber": 379, + "wen": 380, + "hl": 381, + "ger": 382, + "nach": 383, + "ft": 384, + "imm": 385, + "je": 386, + "schen": 387, + "wer": 388, + "ser": 389, + "än": 390, + "sein": 391, + "ol": 392, + "cht": 393, + "für": 394, + "kl": 395, + "ff": 396, + "einem": 397, + "nen": 398, + "ja": 399, + "noch": 400, + "hatte": 401, + "pf": 402, + "hin": 403, + "di": 404, + "chen": 405, + "rü": 406, + "iel": 407, + "sel": 408, + "dass": 409, + "ihn": 410, + "mir": 411, + "schl": 412, + "ön": 413, + "gan": 414, + "gt": 415, + "einer": 416, + "sten": 417, + "mich": 418, + "wenn": 419, + "ell": 420, + "gte": 421, + "mal": 422, + "gel": 423, + "ken": 424, + "nur": 425, + "mmen": 426, + "fü": 427, + "ern": 428, + "ör": 429, + "unter": 430, + "ander": 431, + "dur": 432, + "uch": 433, + "ta": 434, + "men": 435, + "mach": 436, + "doch": 437, + "durch": 438, + "os": 439, + "gl": 440, + "hal": 441, + "ihre": 442, + "wä": 443, + "immer": 444, + "ihm": 445, + "kann": 446, + "ort": 447, + "dann": 448, + "lan": 449, + "tzt": 450, + "oder": 451, + "hren": 452, + "et": 453, + "kön": 454, + "ick": 455, + "fa": 456, + "wieder": 457, + "daß": 458, + "mein": 459, + "fen": 460, + "ganz": 461, + "diese": 462, + "ster": 463, + "dar": 464, + "wa": 465, + "ges": 466, + "na": 467, + "fl": 468, + "igen": 469, + "sche": 470, + "ungen": 471, + "mehr": 472, + "ßen": 473, + "ot": 474, + "kon": 475, + "gew": 476, + "haben": 477, + "geh": 478, + "ät": 479, + "sind": 480, + "dr": 481, + "wel": 482, + "uns": 483, + "vo": 484, + "ma": 485, + "ute": 486, + "schon": 487, + "bes": 488, + "gesch": 489, + "bt": 490, + "che": 491, + "son": 492, + "ob": 493, + "la": 494, + "rück": 495, + "seine": 496, + "kr": 497, + "fre": 498, + "eil": 499, + "zum": 500, + "hier": 501, + "kt": 502, + "ige": 503, + "spr": 504, + "leben": 505, + "bst": 506, + "zeit": 507, + "gro": 508, + "denn": 509, + "ho": 510, + "scha": 511, + "bar": 512, + "alle": 513, + "gegen": 514, + "wür": 515, + "mü": 516, + "ze": 517, + "werden": 518, + "jetzt": 519, + "kommen": 520, + "nie": 521, + "sei": 522, + "heit": 523, + "soll": 524, + "glei": 525, + "meine": 526, + "woll": 527, + "ner": 528, + "habe": 529, + "wur": 530, + "lichen": 531, + "assen": 532, + "nte": 533, + "sehen": 534, + "wird": 535, + "bis": 536, + "gar": 537, + "ien": 538, + "mus": 539, + "uß": 540, + "är": 541, + "stell": 542, + "keit": 543, + "zwei": 544, + "selbst": 545, + "sta": 546, + "pa": 547, + "sagte": 548, + "tet": 549, + "kam": 550, + "ssen": 551, + "viel": 552, + "ug": 553, + "zen": 554, + "hei": 555, + "mann": 556, + "will": 557, + "geb": 558, + "waren": 559, + "ück": 560, + "äch": 561, + "mer": 562, + "ru": 563, + "hau": 564, + "eigen": 565, + "ang": 566, + "weg": 567, + "blick": 568, + "fra": 569, + "alles": 570, + "ka": 571, + "augen": 572, + "fin": 573, + "liche": 574, + "unser": 575, + "dern": 576, + "herr": 577, + "nun": 578, + "vie": 579, + "chte": 580, + "wohl": 581, + "fall": 582, + "ht": 583, + "ün": 584, + "etwas": 585, + "stand": 586, + "äu": 587, + "mö": 588, + "tel": 589, + "rie": 590, + "dich": 591, + "dies": 592, + "hand": 593, + "bin": 594, + "ffen": 595, + "nichts": 596, + "dan": 597, + "hne": 598, + "ihnen": 599, + "esen": 600, + "dieser": 601, + "frau": 602, + "art": 603, + "dir": 604, + "isch": 605, + "erst": 606, + "gleich": 607, + "komm": 608, + "hör": 609, + "ße": 610, + "dig": 611, + "sehr": 612, + "zei": 613, + "sam": 614, + "aum": 615, + "hät": 616, + "ingen": 617, + "gut": 618, + "mut": 619, + "cken": 620, + "konnte": 621, + "stimm": 622, + "zur": 623, + "itz": 624, + "weil": 625, + "würde": 626, + "fä": 627, + "können": 628, + "keine": 629, + "fer": 630, + "ischen": 631, + "voll": 632, + "eines": 633, + "setz": 634, + "zie": 635, + "del": 636, + "tete": 637, + "seiner": 638, + "ieren": 639, + "gest": 640, + "zurück": 641, + "wurde": 642, + "schn": 643, + "pr": 644, + "ließ": 645, + "tra": 646, + "mä": 647, + "gend": 648, + "fol": 649, + "ik": 650, + "schla": 651, + "schaft": 652, + "ater": 653, + "weiß": 654, + "seinen": 655, + "lassen": 656, + "lu": 657, + "unden": 658, + "teil": 659, + "neu": 660, + "iert": 661, + "menschen": 662, + "hmen": 663, + "str": 664, + "gi": 665, + "sah": 666, + "ihren": 667, + "eln": 668, + "weiter": 669, + "gehen": 670, + "iger": 671, + "macht": 672, + "tag": 673, + "also": 674, + "halten": 675, + "nis": 676, + "acht": 677, + "geben": 678, + "og": 679, + "nat": 680, + "mar": 681, + "det": 682, + "ohne": 683, + "haus": 684, + "tro": 685, + "ange": 686, + "lau": 687, + "spiel": 688, + "tre": 689, + "schr": 690, + "inn": 691, + "los": 692, + "machen": 693, + "hätte": 694, + "beg": 695, + "wirk": 696, + "alt": 697, + "glich": 698, + "tes": 699, + "richt": 700, + "freund": 701, + "ihrer": 702, + "fel": 703, + "bel": 704, + "sol": 705, + "einmal": 706, + "eben": 707, + "hol": 708, + "hän": 709, + "tern": 710, + "hö": 711, + "schw": 712, + "recht": 713, + "wahr": 714, + "seinem": 715, + "stehen": 716, + "hlen": 717, + "ins": 718, + "ging": 719, + "wollte": 720, + "wissen": 721, + "ungs": 722, + "ald": 723, + "ass": 724, + "jahr": 725, + "mor": 726, + "welt": 727, + "under": 728, + "zusa": 729, + "kopf": 730, + "lang": 731, + "hinter": 732, + "atz": 733, + "stra": 734, + "angen": 735, + "ank": 736, + "ade": 737, + "glau": 738, + "fach": 739, + "hatten": 740, + "fort": 741, + "eicht": 742, + "iff": 743, + "ler": 744, + "mei": 745, + "diesem": 746, + "kein": 747, + "frei": 748, + "führ": 749, + "vom": 750, + "β": 751, + "ai": 752, + "ait": 753, + "que": 754, + "les": 755, + "av": 756, + "ais": 757, + "oi": 758, + "eu": 759, + "lle": 760, + "par": 761, + "ans": 762, + "ment": 763, + "ét": 764, + "une": 765, + "pas": 766, + "qui": 767, + "elle": 768, + "dé": 769, + "pour": 770, + "dans": 771, + "ré": 772, + "tou": 773, + "vous": 774, + "vi": 775, + "ouv": 776, + "mon": 777, + "sur": 778, + "ci": 779, + "plu": 780, + "ère": 781, + "mais": 782, + "ois": 783, + "plus": 784, + "ée": 785, + "aient": 786, + "mp": 787, + "lui": 788, + "ave": 789, + "était": 790, + "ses": 791, + "tout": 792, + "oir": 793, + "avait": 794, + "és": 795, + "mes": 796, + "nous": 797, + "eux": 798, + "bi": 799, + "ons": 800, + "pu": 801, + "ces": 802, + "tu": 803, + "leur": 804, + "don": 805, + "eur": 806, + "ette": 807, + "aire": 808, + "avec": 809, + "dit": 810, + "té": 811, + "ille": 812, + "comme": 813, + "cr": 814, + "ux": 815, + "ès": 816, + "aux": 817, + "jour": 818, + "ils": 819, + "bien": 820, + "cou": 821, + "quel": 822, + "peu": 823, + "cette": 824, + "cu": 825, + "mê": 826, + "fait": 827, + "gu": 828, + "être": 829, + "ité": 830, + "ens": 831, + "ni": 832, + "lé": 833, + "dis": 834, + "ble": 835, + "né": 836, + "puis": 837, + "même": 838, + "ques": 839, + "fi": 840, + "age": 841, + "moi": 842, + "ence": 843, + "ont": 844, + "main": 845, + "ors": 846, + "aut": 847, + "ance": 848, + "mé": 849, + "sans": 850, + "sé": 851, + "lon": 852, + "hom": 853, + "car": 854, + "able": 855, + "cher": 856, + "deux": 857, + "enf": 858, + "où": 859, + "ph": 860, + "ure": 861, + "temp": 862, + "pos": 863, + "rent": 864, + "pé": 865, + "faire": 866, + "pi": 867, + "tres": 868, + "ça": 869, + "endre": 870, + "bon": 871, + "sou": 872, + "int": 873, + "pré": 874, + "sent": 875, + "tant": 876, + "cer": 877, + "là": 878, + "lais": 879, + "près": 880, + "bre": 881, + "cour": 882, + "pet": 883, + "comp": 884, + "lait": 885, + "trouv": 886, + "entre": 887, + "sont": 888, + "dev": 889, + "nu": 890, + "temps": 891, + "dou": 892, + "rait": 893, + "bou": 894, + "quand": 895, + "jours": 896, + "avoir": 897, + "été": 898, + "ale": 899, + "pre": 900, + "fois": 901, + "orte": 902, + "vé": 903, + "non": 904, + "tous": 905, + "jus": 906, + "coup": 907, + "homme": 908, + "ête": 909, + "aussi": 910, + "urs": 911, + "seu": 912, + "ord": 913, + "min": 914, + "gé": 915, + "core": 916, + "va": 917, + "vre": 918, + "encore": 919, + "sem": 920, + "ite": 921, + "autre": 922, + "pris": 923, + "peut": 924, + "ue": 925, + "ante": 926, + "gn": 927, + "rép": 928, + "hu": 929, + "sion": 930, + "votre": 931, + "dire": 932, + "ez": 933, + "fem": 934, + "leurs": 935, + "met": 936, + "cri": 937, + "mis": 938, + "tour": 939, + "rai": 940, + "jam": 941, + "regar": 942, + "rien": 943, + "vers": 944, + "suis": 945, + "pouv": 946, + "vis": 947, + "grand": 948, + "ants": 949, + "cor": 950, + "rer": 951, + "cé": 952, + "tent": 953, + "pres": 954, + "vou": 955, + "alors": 956, + "sieur": 957, + "aine": 958, + "quoi": 959, + "fon": 960, + "endant": 961, + "arri": 962, + "eure": 963, + "après": 964, + "donc": 965, + "itu": 966, + "lè": 967, + "sait": 968, + "toi": 969, + "cha": 970, + "ail": 971, + "asse": 972, + "imp": 973, + "voy": 974, + "conn": 975, + "pla": 976, + "petit": 977, + "avant": 978, + "nom": 979, + "tin": 980, + "dont": 981, + "sous": 982, + "emp": 983, + "person": 984, + "elles": 985, + "beau": 986, + "parti": 987, + "cho": 988, + "prit": 989, + "toujours": 990, + "rais": 991, + "jamais": 992, + "trav": 993, + "tions": 994, + "très": 995, + "voi": 996, + "ren": 997, + "yeux": 998, + "voir": 999, + "premi": 1000, + "gne": 1001, + "heure": 1002, + "rou": 1003, + "eff": 1004, + "notre": 1005, + "ments": 1006, + "ton": 1007, + "fais": 1008, + "cela": 1009, + "répon": 1010, + "cons": 1011, + "air": 1012, + "ôt": 1013, + "pendant": 1014, + "ici": 1015, + "toute": 1016, + "jet": 1017, + "port": 1018, + "étaient": 1019, + "pen": 1020, + "hé": 1021, + "autres": 1022, + "père": 1023, + "oc": 1024, + "quelques": 1025, + "ique": 1026, + "lis": 1027, + "femme": 1028, + "jou": 1029, + "teur": 1030, + "monde": 1031, + "nes": 1032, + "dre": 1033, + "aff": 1034, + "rap": 1035, + "part": 1036, + "lement": 1037, + "cla": 1038, + "fut": 1039, + "quelque": 1040, + "prendre": 1041, + "rê": 1042, + "aille": 1043, + "sais": 1044, + "ches": 1045, + "let": 1046, + "char": 1047, + "ères": 1048, + "ents": 1049, + "moins": 1050, + "eau": 1051, + "aî": 1052, + "jeu": 1053, + "heur": 1054, + "ées": 1055, + "tri": 1056, + "point": 1057, + "mom": 1058, + "vent": 1059, + "nouv": 1060, + "gran": 1061, + "trois": 1062, + "sant": 1063, + "toutes": 1064, + "contre": 1065, + "èrent": 1066, + "chez": 1067, + "avez": 1068, + "ût": 1069, + "att": 1070, + "pau": 1071, + "porte": 1072, + "ouver": 1073, + "lit": 1074, + "prés": 1075, + "chose": 1076, + "vit": 1077, + "monsieur": 1078, + "hab": 1079, + "tête": 1080, + "ju": 1081, + "tement": 1082, + "ction": 1083, + "vrai": 1084, + "lar": 1085, + "cet": 1086, + "regard": 1087, + "lant": 1088, + "som": 1089, + "moment": 1090, + "illes": 1091, + "ple": 1092, + "ps": 1093, + "mère": 1094, + "cl": 1095, + "sour": 1096, + "ys": 1097, + "trop": 1098, + "enne": 1099, + "jusqu": 1100, + "avaient": 1101, + "avais": 1102, + "jeune": 1103, + "depuis": 1104, + "personne": 1105, + "fit": 1106, + "cert": 1107, + "jo": 1108, + "oui": 1109, + "rest": 1110, + "semb": 1111, + "cap": 1112, + "mat": 1113, + "mu": 1114, + "long": 1115, + "fran": 1116, + "faut": 1117, + "iti": 1118, + "bli": 1119, + "chev": 1120, + "pri": 1121, + "ente": 1122, + "ainsi": 1123, + "cham": 1124, + "lors": 1125, + "cas": 1126, + "ili": 1127, + "bé": 1128, + "nos": 1129, + "sui": 1130, + "rit": 1131, + "cro": 1132, + "gue": 1133, + "ía": 1134, + "por": 1135, + "las": 1136, + "ón": 1137, + "una": 1138, + "aba": 1139, + "dos": 1140, + "era": 1141, + "mb": 1142, + "para": 1143, + "ás": 1144, + "mos": 1145, + "ando": 1146, + "como": 1147, + "más": 1148, + "ción": 1149, + "tan": 1150, + "dad": 1151, + "ado": 1152, + "fu": 1153, + "cia": 1154, + "mente": 1155, + "sus": 1156, + "tar": 1157, + "za": 1158, + "ba": 1159, + "pero": 1160, + "sin": 1161, + "lla": 1162, + "án": 1163, + "ia": 1164, + "ran": 1165, + "ga": 1166, + "yo": 1167, + "tos": 1168, + "cos": 1169, + "ya": 1170, + "ones": 1171, + "había": 1172, + "hi": 1173, + "esta": 1174, + "mas": 1175, + "tor": 1176, + "aban": 1177, + "dor": 1178, + "ían": 1179, + "tas": 1180, + "én": 1181, + "endo": 1182, + "aque": 1183, + "ero": 1184, + "io": 1185, + "qué": 1186, + "cab": 1187, + "tal": 1188, + "señ": 1189, + "ora": 1190, + "todo": 1191, + "sal": 1192, + "cuando": 1193, + "gun": 1194, + "bu": 1195, + "ras": 1196, + "esto": 1197, + "pare": 1198, + "él": 1199, + "tras": 1200, + "jos": 1201, + "mien": 1202, + "pue": 1203, + "cre": 1204, + "pon": 1205, + "día": 1206, + "tros": 1207, + "sab": 1208, + "sobre": 1209, + "ese": 1210, + "mbre": 1211, + "eron": 1212, + "añ": 1213, + "ido": 1214, + "porque": 1215, + "ella": 1216, + "cen": 1217, + "muy": 1218, + "cal": 1219, + "este": 1220, + "has": 1221, + "có": 1222, + "gra": 1223, + "ros": 1224, + "aquel": 1225, + "dijo": 1226, + "cía": 1227, + "zo": 1228, + "ciones": 1229, + "mbi": 1230, + "elo": 1231, + "tó": 1232, + "ina": 1233, + "todos": 1234, + "tien": 1235, + "estaba": 1236, + "deci": 1237, + "cio": 1238, + "ño": 1239, + "lor": 1240, + "nues": 1241, + "medi": 1242, + "len": 1243, + "vida": 1244, + "ali": 1245, + "pues": 1246, + "ales": 1247, + "vol": 1248, + "mí": 1249, + "rar": 1250, + "cion": 1251, + "hasta": 1252, + "señor": 1253, + "cono": 1254, + "ah": 1255, + "dios": 1256, + "esa": 1257, + "ún": 1258, + "var": 1259, + "san": 1260, + "gui": 1261, + "otros": 1262, + "tado": 1263, + "buen": 1264, + "ña": 1265, + "tiemp": 1266, + "hacer": 1267, + "jer": 1268, + "vu": 1269, + "ana": 1270, + "así": 1271, + "antes": 1272, + "vez": 1273, + "miento": 1274, + "jar": 1275, + "lab": 1276, + "casa": 1277, + "eso": 1278, + "ego": 1279, + "dió": 1280, + "está": 1281, + "encia": 1282, + "eli": 1283, + "ías": 1284, + "tiempo": 1285, + "zar": 1286, + "van": 1287, + "mun": 1288, + "erta": 1289, + "tambi": 1290, + "sí": 1291, + "aun": 1292, + "mismo": 1293, + "entes": 1294, + "mano": 1295, + "ele": 1296, + "nada": 1297, + "segu": 1298, + "mej": 1299, + "erra": 1300, + "tir": 1301, + "uno": 1302, + "donde": 1303, + "toda": 1304, + "desde": 1305, + "también": 1306, + "cuer": 1307, + "hombre": 1308, + "otro": 1309, + "lib": 1310, + "trar": 1311, + "cual": 1312, + "hay": 1313, + "cada": 1314, + "taba": 1315, + "mento": 1316, + "tenía": 1317, + "quer": 1318, + "eran": 1319, + "siemp": 1320, + "siempre": 1321, + "erto": 1322, + "quí": 1323, + "gos": 1324, + "pués": 1325, + "ellos": 1326, + "después": 1327, + "nue": 1328, + "llo": 1329, + "inter": 1330, + "cómo": 1331, + "ahora": 1332, + "uste": 1333, + "traba": 1334, + "lado": 1335, + "ino": 1336, + "poco": 1337, + "erte": 1338, + "mujer": 1339, + "quier": 1340, + "algun": 1341, + "fue": 1342, + "ojos": 1343, + "enton": 1344, + "vos": 1345, + "esper": 1346, + "much": 1347, + "otra": 1348, + "az": 1349, + "eza": 1350, + "aquí": 1351, + "cias": 1352, + "gua": 1353, + "mucho": 1354, + "decir": 1355, + "esti": 1356, + "idad": 1357, + "algo": 1358, + "ocu": 1359, + "entonces": 1360, + "dido": 1361, + "entos": 1362, + "gri": 1363, + "dado": 1364, + "ios": 1365, + "dose": 1366, + "usted": 1367, + "quien": 1368, + "ami": 1369, + "unto": 1370, + "mejor": 1371, + "bas": 1372, + "solo": 1373, + "pregun": 1374, + "tur": 1375, + "alg": 1376, + "todas": 1377, + "parte": 1378, + "emb": 1379, + "cto": 1380, + "mundo": 1381, + "tiene": 1382, + "tante": 1383, + "palab": 1384, + "tran": 1385, + "aquella": 1386, + "cios": 1387, + "aunque": 1388, + "cuen": 1389, + "tener": 1390, + "fun": 1391, + "respon": 1392, + "allí": 1393, + "xi": 1394, + "han": 1395, + "pens": 1396, + "contra": 1397, + "tura": 1398, + "val": 1399, + "dio": 1400, + "tanto": 1401, + "camin": 1402, + "mó": 1403, + "esp": 1404, + "ada": 1405, + "ío": 1406, + "hacia": 1407, + "dej": 1408, + "estar": 1409, + "ión": 1410, + "gas": 1411, + "vas": 1412, + "noche": 1413, + "ér": 1414, + "años": 1415, + "padre": 1416, + "gus": 1417, + "ár": 1418, + "sino": 1419, + "manos": 1420, + "cido": 1421, + "estu": 1422, + "hubi": 1423, + "vir": 1424, + "bri": 1425, + "raz": 1426, + "chi": 1427, + "puede": 1428, + "menos": 1429, + "habi": 1430, + "homb": 1431, + "neces": 1432, + "may": 1433, + "eros": 1434, + "ría": 1435, + "hecho": 1436, + "escu": 1437, + "lti": 1438, + "ándo": 1439, + "bus": 1440, + "cosas": 1441, + "tú": 1442, + "espa": 1443, + "reci": 1444, + "ctor": 1445, + "prim": 1446, + "dia": 1447, + "dese": 1448, + "mientras": 1449, + "hor": 1450, + "fuer": 1451, + "ida": 1452, + "posi": 1453, + "lante": 1454, + "ano": 1455, + "estas": 1456, + "pli": 1457, + "luego": 1458, + "sión": 1459, + "cin": 1460, + "tierra": 1461, + "guar": 1462, + "cado": 1463, + "encon": 1464, + "pren": 1465, + "mayor": 1466, + "fal": 1467, + "ð": 1468, + "ħ": 1469, + "ň": 1470, + "ə": 1471, + "θ": 1472, + "’": 1473, + "“": 1474, + "”": 1475, + "zi": 1476, + "gli": 1477, + "tto": 1478, + "ono": 1479, + "nel": 1480, + "tti": 1481, + "della": 1482, + "zione": 1483, + "tta": 1484, + "tà": 1485, + "uo": 1486, + "come": 1487, + "alla": 1488, + "oni": 1489, + "ggi": 1490, + "ssi": 1491, + "più": 1492, + "ini": 1493, + "bb": 1494, + "sto": 1495, + "sono": 1496, + "eri": 1497, + "sse": 1498, + "sc": 1499, + "sul": 1500, + "vano": 1501, + "sti": 1502, + "suo": 1503, + "cchi": 1504, + "zza": 1505, + "anche": 1506, + "tte": 1507, + "sci": 1508, + "col": 1509, + "sso": 1510, + "ssa": 1511, + "dei": 1512, + "aveva": 1513, + "zz": 1514, + "amo": 1515, + "gno": 1516, + "sua": 1517, + "ria": 1518, + "sì": 1519, + "ché": 1520, + "dal": 1521, + "ona": 1522, + "spe": 1523, + "gni": 1524, + "tt": 1525, + "delle": 1526, + "questo": 1527, + "nella": 1528, + "dere": 1529, + "anno": 1530, + "dell": 1531, + "uni": 1532, + "bbe": 1533, + "anti": 1534, + "ene": 1535, + "gio": 1536, + "uto": 1537, + "qual": 1538, + "glia": 1539, + "quando": 1540, + "tutto": 1541, + "glio": 1542, + "zioni": 1543, + "cam": 1544, + "esso": 1545, + "ss": 1546, + "mol": 1547, + "loro": 1548, + "perché": 1549, + "cosa": 1550, + "due": 1551, + "poi": 1552, + "sco": 1553, + "cco": 1554, + "gna": 1555, + "tem": 1556, + "prima": 1557, + "così": 1558, + "essere": 1559, + "ani": 1560, + "bra": 1561, + "rio": 1562, + "anco": 1563, + "cui": 1564, + "spi": 1565, + "via": 1566, + "gior": 1567, + "bile": 1568, + "ggio": 1569, + "mai": 1570, + "tare": 1571, + "indi": 1572, + "rebbe": 1573, + "senza": 1574, + "zio": 1575, + "tutti": 1576, + "stato": 1577, + "zia": 1578, + "dalla": 1579, + "mia": 1580, + "vita": 1581, + "quella": 1582, + "qua": 1583, + "dove": 1584, + "allo": 1585, + "sempre": 1586, + "zzo": 1587, + "sia": 1588, + "dopo": 1589, + "porta": 1590, + "ccia": 1591, + "erano": 1592, + "anni": 1593, + "chia": 1594, + "enza": 1595, + "propri": 1596, + "anda": 1597, + "cca": 1598, + "occhi": 1599, + "questa": 1600, + "ffi": 1601, + "ron": 1602, + "mio": 1603, + "ris": 1604, + "ogni": 1605, + "rin": 1606, + "far": 1607, + "menti": 1608, + "ancora": 1609, + "fatto": 1610, + "mani": 1611, + "senti": 1612, + "pra": 1613, + "tempo": 1614, + "essi": 1615, + "bbi": 1616, + "lare": 1617, + "pers": 1618, + "sor": 1619, + "anza": 1620, + "pie": 1621, + "verso": 1622, + "altro": 1623, + "tato": 1624, + "cato": 1625, + "ato": 1626, + "volta": 1627, + "cc": 1628, + "fare": 1629, + "ciò": 1630, + "bili": 1631, + "nuo": 1632, + "quello": 1633, + "colo": 1634, + "ppo": 1635, + "trova": 1636, + "ore": 1637, + "rono": 1638, + "molto": 1639, + "almente": 1640, + "sca": 1641, + "vole": 1642, + "tali": 1643, + "sulla": 1644, + "sce": 1645, + "meno": 1646, + "anto": 1647, + "pun": 1648, + "stu": 1649, + "capi": 1650, + "giu": 1651, + "mini": 1652, + "pia": 1653, + "lavo": 1654, + "vero": 1655, + "rsi": 1656, + "altri": 1657, + "scia": 1658, + "suoi": 1659, + "glie": 1660, + "sotto": 1661, + "bene": 1662, + "scri": 1663, + "tale": 1664, + "degli": 1665, + "alc": 1666, + "uomo": 1667, + "pel": 1668, + "pote": 1669, + "essa": 1670, + "scu": 1671, + "signo": 1672, + "stro": 1673, + "uti": 1674, + "sione": 1675, + "gre": 1676, + "fini": 1677, + "lun": 1678, + "esi": 1679, + "passa": 1680, + "rà": 1681, + "mentre": 1682, + "hanno": 1683, + "usci": 1684, + "gia": 1685, + "già": 1686, + "mina": 1687, + "tica": 1688, + "giorno": 1689, + "esse": 1690, + "modo": 1691, + "spa": 1692, + "proprio": 1693, + "ori": 1694, + "contro": 1695, + "stru": 1696, + "diven": 1697, + "disse": 1698, + "rato": 1699, + "noi": 1700, + "vere": 1701, + "può": 1702, + "dice": 1703, + "cci": 1704, + "secon": 1705, + "ccio": 1706, + "qualche": 1707, + "tutta": 1708, + "gg": 1709, + "mondo": 1710, + "forma": 1711, + "mma": 1712, + "pensa": 1713, + "deva": 1714, + "fosse": 1715, + "sopra": 1716, + "tamente": 1717, + "ness": 1718, + "quanto": 1719, + "raga": 1720, + "unque": 1721, + "care": 1722, + "stre": 1723, + "grande": 1724, + "picco": 1725, + "guarda": 1726, + "nell": 1727, + "possi": 1728, + "presen": 1729, + "rò": 1730, + "paro": 1731, + "tua": 1732, + "vin": 1733, + "ane": 1734, + "stesso": 1735, + "dav": 1736, + "nei": 1737, + "nelle": 1738, + "ghi": 1739, + "pio": 1740, + "lato": 1741, + "sid": 1742, + "fine": 1743, + "fuo": 1744, + "quasi": 1745, + "ulti": 1746, + "ito": 1747, + "sue": 1748, + "fil": 1749, + "allora": 1750, + "veni": 1751, + "tano": 1752, + "ello": 1753, + "ão": 1754, + "não": 1755, + "uma": 1756, + "ela": 1757, + "lh": 1758, + "ção": 1759, + "cê": 1760, + "inha": 1761, + "você": 1762, + "ec": 1763, + "dade": 1764, + "ao": 1765, + "ram": 1766, + "vel": 1767, + "ém": 1768, + "pode": 1769, + "estava": 1770, + "isso": 1771, + "mui": 1772, + "faz": 1773, + "ões": 1774, + "pes": 1775, + "ix": 1776, + "sim": 1777, + "olh": 1778, + "isa": 1779, + "ên": 1780, + "tinha": 1781, + "meu": 1782, + "são": 1783, + "minha": 1784, + "muito": 1785, + "foi": 1786, + "bem": 1787, + "diz": 1788, + "parec": 1789, + "ço": 1790, + "pesso": 1791, + "pois": 1792, + "mesmo": 1793, + "ções": 1794, + "seus": 1795, + "até": 1796, + "ência": 1797, + "lhe": 1798, + "tiv": 1799, + "mã": 1800, + "só": 1801, + "tão": 1802, + "tudo": 1803, + "então": 1804, + "inda": 1805, + "bal": 1806, + "indo": 1807, + "ndo": 1808, + "já": 1809, + "vam": 1810, + "eito": 1811, + "depois": 1812, + "mel": 1813, + "lha": 1814, + "ainda": 1815, + "fazer": 1816, + "pou": 1817, + "pergun": 1818, + "deix": 1819, + "tamb": 1820, + "ala": 1821, + "pelo": 1822, + "também": 1823, + "fica": 1824, + "prec": 1825, + "eles": 1826, + "havia": 1827, + "lá": 1828, + "nas": 1829, + "gem": 1830, + "mem": 1831, + "ós": 1832, + "deu": 1833, + "eiro": 1834, + "..": 1835, + "assim": 1836, + "ior": 1837, + "har": 1838, + "aqui": 1839, + "cul": 1840, + "sar": 1841, + "outra": 1842, + "olhos": 1843, + "ima": 1844, + "mim": 1845, + "ago": 1846, + "pessoas": 1847, + "eram": 1848, + "eira": 1849, + "pela": 1850, + "coisa": 1851, + "mão": 1852, + "conh": 1853, + "agora": 1854, + "iam": 1855, + "há": 1856, + "suas": 1857, + "guém": 1858, + "cabe": 1859, + "nem": 1860, + "ível": 1861, + "consegu": 1862, + "trabal": 1863, + "lev": 1864, + "lem": 1865, + "vai": 1866, + "tei": 1867, + "pró": 1868, + "quem": 1869, + "onde": 1870, + "cabeça": 1871, + "nunca": 1872, + "mentos": 1873, + "hum": 1874, + "dele": 1875, + "verdade": 1876, + "tá": 1877, + "hos": 1878, + "algum": 1879, + "dizer": 1880, + "penas": 1881, + "nós": 1882, + "enquanto": 1883, + "outro": 1884, + "lho": 1885, + "melhor": 1886, + "primei": 1887, + "iu": 1888, + "apenas": 1889, + "estou": 1890, + "conte": 1891, + "homem": 1892, + "dois": 1893, + "ças": 1894, + "pouco": 1895, + "senhor": 1896, + "tando": 1897, + "espera": 1898, + "pai": 1899, + "rios": 1900, + "baix": 1901, + "ase": 1902, + "isas": 1903, + "hora": 1904, + "ficar": 1905, + "seja": 1906, + "ân": 1907, + "clar": 1908, + "inc": 1909, + "fos": 1910, + "ouvi": 1911, + "vem": 1912, + "tava": 1913, + "ário": 1914, + "sos": 1915, + "inho": 1916, + "rando": 1917, + "ês": 1918, + "coisas": 1919, + "aconte": 1920, + "lher": 1921, + "anos": 1922, + "talvez": 1923, + "estão": 1924, + "liv": 1925, + "outros": 1926, + "qualquer": 1927, + "gou": 1928, + "lí": 1929, + "tivesse": 1930, + "rado": 1931, + "precisa": 1932, + "mãe": 1933, + "dela": 1934, + "entra": 1935, + "maior": 1936, + "noite": 1937, + "tiva": 1938, + "pala": 1939, + "ração": 1940, + "deus": 1941, + "sas": 1942, + "inte": 1943, + "fei": 1944, + "palav": 1945, + "trás": 1946, + "cidade": 1947, + "lugar": 1948, + "vezes": 1949, + "encontra": 1950, + "tru": 1951, + "eci": 1952, + "ın": 1953, + "bir": 1954, + "yor": 1955, + "ek": 1956, + "dı": 1957, + "ey": 1958, + "tı": 1959, + "mı": 1960, + "iz": 1961, + "ır": 1962, + "gö": 1963, + "sı": 1964, + "bil": 1965, + "lı": 1966, + "üz": 1967, + "iç": 1968, + "iy": 1969, + "ım": 1970, + "uz": 1971, + "cak": 1972, + "iş": 1973, + "ını": 1974, + "iyor": 1975, + "baş": 1976, + "dü": 1977, + "değ": 1978, + "kar": 1979, + "ev": 1980, + "öy": 1981, + "bun": 1982, + "yap": 1983, + "sun": 1984, + "gör": 1985, + "yı": 1986, + "ki": 1987, + "ara": 1988, + "alı": 1989, + "onu": 1990, + "çı": 1991, + "şey": 1992, + "sın": 1993, + "kı": 1994, + "kad": 1995, + "ağ": 1996, + "değil": 1997, + "ük": 1998, + "çok": 1999, + "şı": 2000, + "ül": 2001, + "için": 2002, + "eye": 2003, + "oldu": 2004, + "mış": 2005, + "kal": 2006, + "mek": 2007, + "öyle": 2008, + "yordu": 2009, + "yüz": 2010, + "miş": 2011, + "mak": 2012, + "ola": 2013, + "yan": 2014, + "cek": 2015, + "yorum": 2016, + "bak": 2017, + "üm": 2018, + "ları": 2019, + "oğ": 2020, + "kadar": 2021, + "arı": 2022, + "ında": 2023, + "gün": 2024, + "yok": 2025, + "yer": 2026, + "dım": 2027, + "daha": 2028, + "ına": 2029, + "dim": 2030, + "bilir": 2031, + "iki": 2032, + "siz": 2033, + "diğ": 2034, + "bü": 2035, + "düş": 2036, + "üç": 2037, + "unu": 2038, + "aman": 2039, + "fak": 2040, + "ede": 2041, + "sonra": 2042, + "hiç": 2043, + "aki": 2044, + "ğı": 2045, + "bul": 2046, + "maz": 2047, + "anla": 2048, + "bura": 2049, + "geç": 2050, + "maya": 2051, + "konu": 2052, + "din": 2053, + "tek": 2054, + "zaman": 2055, + "eler": 2056, + "öz": 2057, + "dır": 2058, + "gibi": 2059, + "şa": 2060, + "leri": 2061, + "kim": 2062, + "ku": 2063, + "fakat": 2064, + "yar": 2065, + "göz": 2066, + "cı": 2067, + "yorsun": 2068, + "bek": 2069, + "inde": 2070, + "pek": 2071, + "bunu": 2072, + "lik": 2073, + "iler": 2074, + "edi": 2075, + "öl": 2076, + "sür": 2077, + "sır": 2078, + "çık": 2079, + "sıl": 2080, + "alar": 2081, + "kes": 2082, + "yak": 2083, + "çek": 2084, + "yıl": 2085, + "ecek": 2086, + "ız": 2087, + "git": 2088, + "kap": 2089, + "ama": 2090, + "ıl": 2091, + "ların": 2092, + "biz": 2093, + "tır": 2094, + "oy": 2095, + "ancak": 2096, + "doğ": 2097, + "bana": 2098, + "şim": 2099, + "başla": 2100, + "lü": 2101, + "madı": 2102, + "beni": 2103, + "yük": 2104, + "lık": 2105, + "beş": 2106, + "nasıl": 2107, + "tık": 2108, + "tür": 2109, + "daki": 2110, + "ceğ": 2111, + "zı": 2112, + "iyi": 2113, + "dok": 2114, + "benim": 2115, + "cağ": 2116, + "yen": 2117, + "şu": 2118, + "mez": 2119, + "düşün": 2120, + "kendi": 2121, + "şimdi": 2122, + "yol": 2123, + "yu": 2124, + "iste": 2125, + "sek": 2126, + "mam": 2127, + "söyle": 2128, + "dik": 2129, + "kur": 2130, + "olduğ": 2131, + "sını": 2132, + "biliyor": 2133, + "kan": 2134, + "yal": 2135, + "meye": 2136, + "muş": 2137, + "kaç": 2138, + "iye": 2139, + "tü": 2140, + "ef": 2141, + "tım": 2142, + "evet": 2143, + "yet": 2144, + "burada": 2145, + "tim": 2146, + "biraz": 2147, + "kor": 2148, + "doğru": 2149, + "inin": 2150, + "kız": 2151, + "diye": 2152, + "dör": 2153, + "etti": 2154, + "onun": 2155, + "isti": 2156, + "ği": 2157, + "sana": 2158, + "üş": 2159, + "arka": 2160, + "hayır": 2161, + "karşı": 2162, + "ile": 2163, + "hak": 2164, + "ıyor": 2165, + "neden": 2166, + "sev": 2167, + "sız": 2168, + "çocu": 2169, + "çalı": 2170, + "olur": 2171, + "bır": 2172, + "gir": 2173, + "ise": 2174, + "ih": 2175, + "kır": 2176, + "dön": 2177, + "böyle": 2178, + "seni": 2179, + "!\"": 2180, + "dört": 2181, + "söy": 2182, + "oş": 2183, + "musun": 2184, + "laş": 2185, + "ip": 2186, + "kay": 2187, + "hem": 2188, + "büyük": 2189, + "aç": 2190, + "bırak": 2191, + "misin": 2192, + "söz": 2193, + "değiş": 2194, + "ünü": 2195, + "gül": 2196, + "kö": 2197, + "karı": 2198, + "tamam": 2199, + "olu": 2200, + "yeni": 2201, + "lam": 2202, + "mıştı": 2203, + "yaş": 2204, + "iniz": 2205, + "kadın": 2206, + "bunun": 2207, + "mey": 2208, + "altı": 2209, + "yi": 2210, + "inden": 2211, + "senin": 2212, + "yat": 2213, + "top": 2214, + "isi": 2215, + "dün": 2216, + "hiçbir": 2217, + "yon": 2218, + "dın": 2219, + "tün": 2220, + "başka": 2221, + "hep": 2222, + "irmi": 2223, + "devam": 2224, + "olacak": 2225, + "artık": 2226, + "durum": 2227, + "imiz": 2228, + "üzel": 2229, + "lerini": 2230, + "sağ": 2231, + "gerek": 2232, + "yirmi": 2233, + "şek": 2234, + "bağ": 2235, + "lara": 2236, + "yür": 2237, + "ması": 2238, + "katı": 2239, + "dedi": 2240, + "gü": 2241, + "sorun": 2242, + "üne": 2243, + "mız": 2244, + "yapı": 2245, + "mil": 2246, + "ğını": 2247, + "tara": 2248, + "vardı": 2249, + "konuş": 2250, + "arak": 2251, + "larak": 2252, + "çocuk": 2253, + "bütün": 2254, + "ley": 2255, + "dür": 2256, + "güzel": 2257, + "ayı": 2258, + "yapa": 2259, + "nı": 2260, + "ayr": 2261, + "öne": 2262, + "yordum": 2263, + "ban": 2264, + "i̇ş": 2265, + "dum": 2266, + "yorlar": 2267, + "larını": 2268, + "çıkar": 2269, + "zan": 2270, + "seç": 2271, + "liyor": 2272, + "tak": 2273, + "şık": 2274, + "tekrar": 2275, + "aş": 2276, + "eş": 2277, + "mişti": 2278, + "kin": 2279, + "imi": 2280, + "eğ": 2281, + "gidi": 2282, + "leş": 2283, + "başladı": 2284, + "gide": 2285, + "otur": 2286, + "dde": 2287, + "ından": 2288, + "üzer": 2289, + "ının": 2290, + "nız": 2291, + "uy": 2292, + "yedi": 2293, + "kat": 2294, + "olarak": 2295, + "ladı": 2296, + "yalnız": 2297, + "bah": 2298, + "iyet": 2299, + "sak": 2300, + "açık": 2301, + "sında": 2302, + "...": 2303, + "insan": 2304, + "aynı": 2305, + "eder": 2306, + "istan": 2307, + "uzun": 2308, + "geri": 2309, + "erek": 2310, + "olan": 2311, + "gerçek": 2312, + "alan": 2313, + "dış": 2314, + "alık": 2315, + "fark": 2316, + "üst": 2317, + "sade": 2318, + "kiş": 2319, + "ldı": 2320, + "zor": 2321, + "etir": 2322, + "herkes": 2323, + "ömer": 2324, + "unda": 2325, + "haf": 2326, + "buna": 2327, + "ydı": 2328, + "peki": 2329, + "adam": 2330, + "haz": 2331, + "sına": 2332, + "kapı": 2333, + "görüş": 2334, + "sadece": 2335, + "aldı": 2336, + "geldi": 2337, + "rz": 2338, + "sz": 2339, + "cz": 2340, + "ię": 2341, + "dz": 2342, + "ał": 2343, + "się": 2344, + "rze": 2345, + "że": 2346, + "wy": 2347, + "rzy": 2348, + "ła": 2349, + "ło": 2350, + "ny": 2351, + "dzie": 2352, + "dzi": 2353, + "czy": 2354, + "cie": 2355, + "prze": 2356, + "dy": 2357, + "kie": 2358, + "ry": 2359, + "ją": 2360, + "ów": 2361, + "przy": 2362, + "mie": 2363, + "szy": 2364, + "cze": 2365, + "bie": 2366, + "cy": 2367, + "nia": 2368, + "ści": 2369, + "sze": 2370, + "jest": 2371, + "ży": 2372, + "ną": 2373, + "któ": 2374, + "ała": 2375, + "mnie": 2376, + "ły": 2377, + "cza": 2378, + "jak": 2379, + "roz": 2380, + "ró": 2381, + "zna": 2382, + "łu": 2383, + "ść": 2384, + "wia": 2385, + "wszy": 2386, + "spo": 2387, + "gdy": 2388, + "wał": 2389, + "wię": 2390, + "łem": 2391, + "ję": 2392, + "sk": 2393, + "rę": 2394, + "dob": 2395, + "już": 2396, + "bę": 2397, + "ałem": 2398, + "sza": 2399, + "pod": 2400, + "dla": 2401, + "pan": 2402, + "nę": 2403, + "może": 2404, + "śli": 2405, + "ało": 2406, + "lko": 2407, + "nych": 2408, + "powie": 2409, + "cię": 2410, + "tylko": 2411, + "naj": 2412, + "tego": 2413, + "ski": 2414, + "nego": 2415, + "wszyst": 2416, + "szcze": 2417, + "jed": 2418, + "jej": 2419, + "two": 2420, + "ąd": 2421, + "śmy": 2422, + "czę": 2423, + "wać": 2424, + "jego": 2425, + "ża": 2426, + "sy": 2427, + "praw": 2428, + "tym": 2429, + "który": 2430, + "ały": 2431, + "trze": 2432, + "niej": 2433, + "nym": 2434, + "gło": 2435, + "jąc": 2436, + "mówi": 2437, + "ska": 2438, + "nej": 2439, + "słu": 2440, + "wła": 2441, + "będzie": 2442, + "dę": 2443, + "pó": 2444, + "bez": 2445, + "nic": 2446, + "pła": 2447, + "ście": 2448, + "są": 2449, + "trzy": 2450, + "kiem": 2451, + "był": 2452, + "mog": 2453, + "robi": 2454, + "tam": 2455, + "mię": 2456, + "zy": 2457, + "pew": 2458, + "myś": 2459, + "przed": 2460, + "sko": 2461, + "które": 2462, + "lę": 2463, + "wsze": 2464, + "ąc": 2465, + "było": 2466, + "sobie": 2467, + "py": 2468, + "cią": 2469, + "jeszcze": 2470, + "tę": 2471, + "czas": 2472, + "szę": 2473, + "gł": 2474, + "kę": 2475, + "czu": 2476, + "przez": 2477, + "sło": 2478, + "wz": 2479, + "kto": 2480, + "ków": 2481, + "czo": 2482, + "liśmy": 2483, + "więc": 2484, + "rą": 2485, + "wó": 2486, + "rza": 2487, + "ności": 2488, + "wet": 2489, + "nął": 2490, + "śmie": 2491, + "nawet": 2492, + "musi": 2493, + "swo": 2494, + "tej": 2495, + "wą": 2496, + "wu": 2497, + "wią": 2498, + "niu": 2499, + "czą": 2500, + "dzo": 2501, + "skie": 2502, + "jeśli": 2503, + "czego": 2504, + "chy": 2505, + "dł": 2506, + "tych": 2507, + "bym": 2508, + "żo": 2509, + "eś": 2510, + "sią": 2511, + "kiedy": 2512, + "wró": 2513, + "dze": 2514, + "dro": 2515, + "rów": 2516, + "pani": 2517, + "kul": 2518, + "nad": 2519, + "chwi": 2520, + "nim": 2521, + "być": 2522, + "chodzi": 2523, + "nio": 2524, + "dobrze": 2525, + "teraz": 2526, + "wokul": 2527, + "coś": 2528, + "kł": 2529, + "pier": 2530, + "gdzie": 2531, + "dzy": 2532, + "pię": 2533, + "dź": 2534, + "ką": 2535, + "gó": 2536, + "zda": 2537, + "chce": 2538, + "stę": 2539, + "świa": 2540, + "wszystko": 2541, + "peł": 2542, + "wiem": 2543, + "wiel": 2544, + "każ": 2545, + "rzu": 2546, + "sły": 2547, + "jedna": 2548, + "myśl": 2549, + "mój": 2550, + "jestem": 2551, + "óż": 2552, + "miej": 2553, + "moż": 2554, + "kła": 2555, + "resz": 2556, + "dłu": 2557, + "stwo": 2558, + "nię": 2559, + "masz": 2560, + "żeby": 2561, + "niem": 2562, + "jakie": 2563, + "sty": 2564, + "nią": 2565, + "wej": 2566, + "oj": 2567, + "sła": 2568, + "ność": 2569, + "zło": 2570, + "szczę": 2571, + "lej": 2572, + "wego": 2573, + "cał": 2574, + "dział": 2575, + "kich": 2576, + "dza": 2577, + "dzię": 2578, + "oczy": 2579, + "zosta": 2580, + "czło": 2581, + "nam": 2582, + "kil": 2583, + "szu": 2584, + "wę": 2585, + "miał": 2586, + "strze": 2587, + "cej": 2588, + "ej": 2589, + "znaj": 2590, + "dać": 2591, + "miejs": 2592, + "kró": 2593, + "kry": 2594, + "bardzo": 2595, + "śnie": 2596, + "lą": 2597, + "gie": 2598, + "ciebie": 2599, + "dni": 2600, + "potrze": 2601, + "wokulski": 2602, + "uwa": 2603, + "umie": 2604, + "jednak": 2605, + "kra": 2606, + "wróci": 2607, + "człowie": 2608, + "czyć": 2609, + "była": 2610, + "żeli": 2611, + "mę": 2612, + "cę": 2613, + "zrobi": 2614, + "mogę": 2615, + "prowa": 2616, + "rem": 2617, + "niech": 2618, + "cznie": 2619, + "kro": 2620, + "tą": 2621, + "chci": 2622, + "bro": 2623, + "dzieć": 2624, + "szą": 2625, + "pad": 2626, + "trz": 2627, + "jem": 2628, + "tów": 2629, + "dru": 2630, + "taj": 2631, + "rzekł": 2632, + "niego": 2633, + "takie": 2634, + "wała": 2635, + "towa": 2636, + "kapła": 2637, + "widzi": 2638, + "podob": 2639, + "dzę": 2640, + "tał": 2641, + "stęp": 2642, + "bą": 2643, + "poko": 2644, + "wem": 2645, + "gę": 2646, + "aby": 2647, + "albo": 2648, + "spra": 2649, + "zno": 2650, + "smo": 2651, + "jesz": 2652, + "księ": 2653, + "jesteś": 2654, + "poz": 2655, + "nigdy": 2656, + "ksią": 2657, + "cóż": 2658, + "ws": 2659, + "pow": 2660, + "tka": 2661, + "świe": 2662, + "szka": 2663, + "samo": 2664, + "sł": 2665, + "rzę": 2666, + "nale": 2667, + "chcesz": 2668, + "nik": 2669, + "pę": 2670, + "chyba": 2671, + "ciąg": 2672, + "jący": 2673, + "woj": 2674, + "nasze": 2675, + "mniej": 2676, + "więcej": 2677, + "zwy": 2678, + "osta": 2679, + "waż": 2680, + "śmier": 2681, + "wier": 2682, + "dzą": 2683, + "zaś": 2684, + "gdyby": 2685, + "jaki": 2686, + "wol": 2687, + "win": 2688, + "dą": 2689, + "ścia": 2690, + "rozma": 2691, + "wal": 2692, + "panie": 2693, + "star": 2694, + "kaz": 2695, + "jeżeli": 2696, + "wra": 2697, + "koń": 2698, + "siebie": 2699, + "znowu": 2700, + "czem": 2701, + "stwa": 2702, + "isto": 2703, + "pół": 2704, + "dał": 2705, + "kobie": 2706, + "ałam": 2707, + "wych": 2708, + "cesa": 2709, + "nich": 2710, + "zawsze": 2711, + "dzić": 2712, + "też": 2713, + "lepie": 2714, + "proszę": 2715, + "kre": 2716, + "twa": 2717, + "łą": 2718, + "chu": 2719, + "cą": 2720, + "prz": 2721, + "łe": 2722, + "szedł": 2723, + "odpowie": 2724, + "myśli": 2725, + "świą": 2726, + "ź": 2727, + "ł": 2728, + "&": 2729, + "=": 2730, + "ă": 2731, + "đ": 2732, + "ţ": 2733, + "–": 2734, + "‘": 2735, + "ij": 2736, + "aa": 2737, + "een": 2738, + "het": 2739, + "aar": 2740, + "oor": 2741, + "ijn": 2742, + "dat": 2743, + "oe": 2744, + "ijk": 2745, + "aan": 2746, + "voor": 2747, + "iet": 2748, + "zijn": 2749, + "niet": 2750, + "oo": 2751, + "moet": 2752, + "heb": 2753, + "uit": 2754, + "wij": 2755, + "aat": 2756, + "lijk": 2757, + "sl": 2758, + "daar": 2759, + "deze": 2760, + "worden": 2761, + "moeten": 2762, + "onder": 2763, + "hebben": 2764, + "ook": 2765, + "ct": 2766, + "nog": 2767, + "aal": 2768, + "eer": 2769, + "bij": 2770, + "mijn": 2771, + "kom": 2772, + "atie": 2773, + "eft": 2774, + "kel": 2775, + "rij": 2776, + "heid": 2777, + "af": 2778, + "stel": 2779, + "maar": 2780, + "wee": 2781, + "heeft": 2782, + "waar": 2783, + "eren": 2784, + "wat": 2785, + "wil": 2786, + "aag": 2787, + "bet": 2788, + "hij": 2789, + "kun": 2790, + "uw": 2791, + "dt": 2792, + "door": 2793, + "tij": 2794, + "ond": 2795, + "geen": 2796, + "gev": 2797, + "veel": 2798, + "naar": 2799, + "aten": 2800, + "kunnen": 2801, + "echt": 2802, + "goe": 2803, + "twee": 2804, + "delijk": 2805, + "uur": 2806, + "toe": 2807, + "meer": 2808, + "onze": 2809, + "tijd": 2810, + "hoe": 2811, + "tot": 2812, + "zou": 2813, + "aak": 2814, + "amen": 2815, + "woor": 2816, + "wordt": 2817, + "gelijk": 2818, + "gaan": 2819, + "ker": 2820, + "eld": 2821, + "hou": 2822, + "zel": 2823, + "tegen": 2824, + "komen": 2825, + "werk": 2826, + "goed": 2827, + "zal": 2828, + "zij": 2829, + "slag": 2830, + "zien": 2831, + "echter": 2832, + "itie": 2833, + "tie": 2834, + "elijk": 2835, + "ische": 2836, + "belan": 2837, + "haar": 2838, + "vr": 2839, + "grijk": 2840, + "doen": 2841, + "land": 2842, + "belangrijk": 2843, + "open": 2844, + "ctie": 2845, + "zelf": 2846, + "mij": 2847, + "iteit": 2848, + "stem": 2849, + "mee": 2850, + "aren": 2851, + "dien": 2852, + "gaat": 2853, + "prob": 2854, + "moe": 2855, + "ullen": 2856, + "zich": 2857, + "daarom": 2858, + "orm": 2859, + "staat": 2860, + "zit": 2861, + "dui": 2862, + "dus": 2863, + "ds": 2864, + "verslag": 2865, + "kelijk": 2866, + "proble": 2867, + "schap": 2868, + "gd": 2869, + "hun": 2870, + "erd": 2871, + "zet": 2872, + "staan": 2873, + "maal": 2874, + "inder": 2875, + "eid": 2876, + "kken": 2877, + "ged": 2878, + "zullen": 2879, + "mensen": 2880, + "jaar": 2881, + "regel": 2882, + "ieder": 2883, + "volgen": 2884, + "geven": 2885, + "even": 2886, + "blij": 2887, + "ië": 2888, + "uwe": 2889, + "maken": 2890, + "oek": 2891, + "nieuwe": 2892, + "baar": 2893, + "andere": 2894, + "ruik": 2895, + "agen": 2896, + "ouw": 2897, + "willen": 2898, + "aakt": 2899, + "hoo": 2900, + "anden": 2901, + "lig": 2902, + "samen": 2903, + "zeer": 2904, + "duidelijk": 2905, + "antwoor": 2906, + "heel": 2907, + "punt": 2908, + "houden": 2909, + "vraag": 2910, + "gele": 2911, + "eens": 2912, + "besch": 2913, + "omen": 2914, + "erg": 2915, + "doel": 2916, + "dag": 2917, + "uren": 2918, + "ings": 2919, + "oren": 2920, + "delen": 2921, + "steun": 2922, + "innen": 2923, + "pol": 2924, + "oon": 2925, + "sn": 2926, + "zonder": 2927, + "nodig": 2928, + "alleen": 2929, + "mid": 2930, + "ragen": 2931, + "iets": 2932, + "versch": 2933, + "gebruik": 2934, + "rouw": 2935, + "stellen": 2936, + "menten": 2937, + "eerste": 2938, + "laat": 2939, + "groot": 2940, + "ood": 2941, + "toch": 2942, + "laten": 2943, + "aard": 2944, + "sle": 2945, + "deel": 2946, + "plaat": 2947, + "ree": 2948, + "betre": 2949, + "lid": 2950, + "uiten": 2951, + "racht": 2952, + "beleid": 2953, + "stie": 2954, + "staten": 2955, + "ggen": 2956, + "reken": 2957, + "alen": 2958, + "ming": 2959, + "mogelijk": 2960, + "grote": 2961, + "altijd": 2962, + "enkel": 2963, + "wik": 2964, + "politie": 2965, + "elk": 2966, + "handel": 2967, + "kwe": 2968, + "maat": 2969, + "elen": 2970, + "vrij": 2971, + "jes": 2972, + "aam": 2973, + "huis": 2974, + "weer": 2975, + "lidstaten": 2976, + "king": 2977, + "kle": 2978, + "bed": 2979, + "geval": 2980, + "wikkel": 2981, + "kwestie": 2982, + "stee": 2983, + "hel": 2984, + "komst": 2985, + "iden": 2986, + "eerd": 2987, + "tweede": 2988, + "probleem": 2989, + "ussen": 2990, + "snel": 2991, + "tig": 2992, + "ult": 2993, + "nemen": 2994, + "commis": 2995, + "verschil": 2996, + "zoek": 2997, + "krij": 2998, + "graag": 2999, + "denk": 3000, + "landen": 3001, + "reden": 3002, + "besl": 3003, + "oeg": 3004, + "beter": 3005, + "heden": 3006, + "mag": 3007, + "boven": 3008, + "cont": 3009, + "fd": 3010, + "hele": 3011, + "vier": 3012, + "gez": 3013, + "kw": 3014, + "aas": 3015, + "ontwikkel": 3016, + "drie": 3017, + "vaak": 3018, + "plaats": 3019, + "gang": 3020, + "ijf": 3021, + "natuur": 3022, + "tussen": 3023, + "bat": 3024, + "komt": 3025, + "wacht": 3026, + "aad": 3027, + "achter": 3028, + "gebie": 3029, + "verk": 3030, + "ligt": 3031, + "nieuw": 3032, + "vand": 3033, + "ý": 3034, + "ď": 3035, + "ě": 3036, + "ř": 3037, + "ť": 3038, + "ů": 3039, + "„": 3040, + "ní": 3041, + "ně": 3042, + "ře": 3043, + "ná": 3044, + "vě": 3045, + "vá": 3046, + "rá": 3047, + "vy": 3048, + "mě": 3049, + "ři": 3050, + "ří": 3051, + "že": 3052, + "jí": 3053, + "vý": 3054, + "ji": 3055, + "dě": 3056, + "če": 3057, + "tě": 3058, + "ky": 3059, + "še": 3060, + "ké": 3061, + "ší": 3062, + "pře": 3063, + "ví": 3064, + "ný": 3065, + "ži": 3066, + "má": 3067, + "cí": 3068, + "zá": 3069, + "ské": 3070, + "dá": 3071, + "byl": 3072, + "tí": 3073, + "pří": 3074, + "při": 3075, + "či": 3076, + "vní": 3077, + "ča": 3078, + "dí": 3079, + "dní": 3080, + "ká": 3081, + "nou": 3082, + "vět": 3083, + "pě": 3084, + "kou": 3085, + "ých": 3086, + "bě": 3087, + "prá": 3088, + "jako": 3089, + "ží": 3090, + "zí": 3091, + "jsou": 3092, + "jsem": 3093, + "lní": 3094, + "cké": 3095, + "vat": 3096, + "před": 3097, + "hla": 3098, + "stá": 3099, + "čí": 3100, + "ši": 3101, + "kla": 3102, + "ště": 3103, + "lou": 3104, + "mů": 3105, + "chá": 3106, + "pů": 3107, + "také": 3108, + "dů": 3109, + "nost": 3110, + "tře": 3111, + "sku": 3112, + "vše": 3113, + "tní": 3114, + "byla": 3115, + "ční": 3116, + "jeho": 3117, + "bý": 3118, + "vání": 3119, + "ných": 3120, + "tři": 3121, + "vz": 3122, + "stře": 3123, + "dva": 3124, + "hle": 3125, + "čá": 3126, + "nosti": 3127, + "vš": 3128, + "hra": 3129, + "jen": 3130, + "slo": 3131, + "však": 3132, + "kdy": 3133, + "bylo": 3134, + "bude": 3135, + "jší": 3136, + "vých": 3137, + "ním": 3138, + "sm": 3139, + "koli": 3140, + "rů": 3141, + "může": 3142, + "není": 3143, + "hod": 3144, + "bí": 3145, + "tý": 3146, + "stě": 3147, + "uje": 3148, + "sá": 3149, + "pět": 3150, + "krá": 3151, + "tom": 3152, + "ství": 3153, + "vně": 3154, + "sed": 3155, + "své": 3156, + "pí": 3157, + "musí": 3158, + "už": 3159, + "tím": 3160, + "jící": 3161, + "jedno": 3162, + "čas": 3163, + "čty": 3164, + "ský": 3165, + "evro": 3166, + "toho": 3167, + "hy": 3168, + "kter": 3169, + "rní": 3170, + "stí": 3171, + "svě": 3172, + "pak": 3173, + "všech": 3174, + "ků": 3175, + "ng": 3176, + "ád": 3177, + "chází": 3178, + "být": 3179, + "první": 3180, + "mno": 3181, + "ského": 3182, + "pá": 3183, + "nebo": 3184, + "kem": 3185, + "sla": 3186, + "ného": 3187, + "zde": 3188, + "další": 3189, + "řa": 3190, + "čtyři": 3191, + "hrá": 3192, + "druh": 3193, + "lně": 3194, + "vla": 3195, + "ských": 3196, + "ško": 3197, + "půso": 3198, + "proto": 3199, + "vů": 3200, + "ská": 3201, + "šest": 3202, + "dně": 3203, + "ještě": 3204, + "mezi": 3205, + "několi": 3206, + "již": 3207, + "čně": 3208, + "slu": 3209, + "zná": 3210, + "sedm": 3211, + "vlá": 3212, + "osm": 3213, + "byly": 3214, + "vám": 3215, + "cký": 3216, + "tech": 3217, + "ději": 3218, + "velmi": 3219, + "leži": 3220, + "vala": 3221, + "lý": 3222, + "tvo": 3223, + "spole": 3224, + "stup": 3225, + "mož": 3226, + "evrop": 3227, + "stal": 3228, + "jde": 3229, + "rodi": 3230, + "její": 3231, + "poli": 3232, + "devět": 3233, + "sme": 3234, + "až": 3235, + "této": 3236, + "tento": 3237, + "kaž": 3238, + "nula": 3239, + "bych": 3240, + "moc": 3241, + "stou": 3242, + "kdo": 3243, + "zd": 3244, + "praco": 3245, + "tomu": 3246, + "ným": 3247, + "živo": 3248, + "zem": 3249, + "násle": 3250, + "sky": 3251, + "jich": 3252, + "měl": 3253, + "děla": 3254, + "jsme": 3255, + "nice": 3256, + "stej": 3257, + "stní": 3258, + "náro": 3259, + "nit": 3260, + "později": 3261, + "tako": 3262, + "nce": 3263, + "čer": 3264, + "ším": 3265, + "něco": 3266, + "vál": 3267, + "řej": 3268, + "krát": 3269, + "ální": 3270, + "asi": 3271, + "které": 3272, + "stav": 3273, + "mají": 3274, + "mys": 3275, + "době": 3276, + "sně": 3277, + "zku": 3278, + "tů": 3279, + "chod": 3280, + "spě": 3281, + "jejich": 3282, + "součas": 3283, + "vali": 3284, + "kte": 3285, + "prů": 3286, + "zení": 3287, + "pat": 3288, + "potře": 3289, + "dnes": 3290, + "zemí": 3291, + "znam": 3292, + "mám": 3293, + "tedy": 3294, + "hlavní": 3295, + "použí": 3296, + "bní": 3297, + "vede": 3298, + "lep": 3299, + "jek": 3300, + "prav": 3301, + "politi": 3302, + "dne": 3303, + "čení": 3304, + "než": 3305, + "děl": 3306, + "čo": 3307, + "cích": 3308, + "sté": 3309, + "dlou": 3310, + "několik": 3311, + "vyu": 3312, + "ckých": 3313, + "nové": 3314, + "čin": 3315, + "dělá": 3316, + "ký": 3317, + "obla": 3318, + "podle": 3319, + "důleži": 3320, + "poku": 3321, + "kone": 3322, + "dý": 3323, + "dvě": 3324, + "žád": 3325, + "nout": 3326, + "tku": 3327, + "tvr": 3328, + "ckého": 3329, + "rov": 3330, + "tele": 3331, + "psa": 3332, + "svět": 3333, + "tivní": 3334, + "dosta": 3335, + "šel": 3336, + "druhé": 3337, + "skou": 3338, + "žo": 3339, + "jedná": 3340, + "význam": 3341, + "problé": 3342, + "publi": 3343, + "ván": 3344, + "odpo": 3345, + "podpo": 3346, + "dle": 3347, + "jaké": 3348, + "šení": 3349, + "vím": 3350, + "během": 3351, + "nachází": 3352, + "slou": 3353, + "pouze": 3354, + "otá": 3355, + "plo": 3356, + "tové": 3357, + "větši": 3358, + "komi": 3359, + "vají": 3360, + "tyto": 3361, + "zápa": 3362, + "změ": 3363, + "moh": 3364, + "více": 3365, + "společ": 3366, + "auto": 3367, + "proti": 3368, + "dět": 3369, + "cháze": 3370, + "žel": 3371, + "«": 3372, + "»": 3373, + "а": 3374, + "б": 3375, + "в": 3376, + "г": 3377, + "д": 3378, + "е": 3379, + "ж": 3380, + "з": 3381, + "и": 3382, + "й": 3383, + "к": 3384, + "л": 3385, + "м": 3386, + "н": 3387, + "о": 3388, + "п": 3389, + "р": 3390, + "с": 3391, + "т": 3392, + "у": 3393, + "ф": 3394, + "х": 3395, + "ц": 3396, + "ч": 3397, + "ш": 3398, + "щ": 3399, + "ъ": 3400, + "ы": 3401, + "ь": 3402, + "э": 3403, + "ю": 3404, + "я": 3405, + "ё": 3406, + "‑": 3407, + "−": 3408, + "ст": 3409, + "ен": 3410, + "но": 3411, + "на": 3412, + "пр": 3413, + "то": 3414, + "по": 3415, + "ра": 3416, + "го": 3417, + "ко": 3418, + "не": 3419, + "во": 3420, + "ва": 3421, + "ет": 3422, + "ер": 3423, + "ни": 3424, + "ел": 3425, + "ит": 3426, + "ны": 3427, + "за": 3428, + "ро": 3429, + "ени": 3430, + "ка": 3431, + "ли": 3432, + "ем": 3433, + "да": 3434, + "об": 3435, + "ла": 3436, + "до": 3437, + "ся": 3438, + "ть": 3439, + "от": 3440, + "ло": 3441, + "ль": 3442, + "ед": 3443, + "со": 3444, + "ми": 3445, + "ре": 3446, + "мо": 3447, + "ци": 3448, + "про": 3449, + "та": 3450, + "это": 3451, + "ки": 3452, + "ру": 3453, + "при": 3454, + "ти": 3455, + "се": 3456, + "ста": 3457, + "вы": 3458, + "мы": 3459, + "ви": 3460, + "бы": 3461, + "ма": 3462, + "ес": 3463, + "ля": 3464, + "сти": 3465, + "ле": 3466, + "что": 3467, + "ме": 3468, + "ри": 3469, + "ча": 3470, + "од": 3471, + "ей": 3472, + "ель": 3473, + "ения": 3474, + "га": 3475, + "ну": 3476, + "си": 3477, + "па": 3478, + "раз": 3479, + "бо": 3480, + "сто": 3481, + "су": 3482, + "са": 3483, + "ду": 3484, + "его": 3485, + "ест": 3486, + "ин": 3487, + "ить": 3488, + "из": 3489, + "же": 3490, + "му": 3491, + "пер": 3492, + "под": 3493, + "ение": 3494, + "сь": 3495, + "ку": 3496, + "пред": 3497, + "ного": 3498, + "ных": 3499, + "вер": 3500, + "те": 3501, + "ной": 3502, + "ции": 3503, + "де": 3504, + "ры": 3505, + "дел": 3506, + "лю": 3507, + "ве": 3508, + "он": 3509, + "мен": 3510, + "ги": 3511, + "ня": 3512, + "бу": 3513, + "пра": 3514, + "все": 3515, + "ется": 3516, + "сть": 3517, + "жа": 3518, + "дол": 3519, + "жи": 3520, + "бе": 3521, + "кон": 3522, + "сл": 3523, + "ши": 3524, + "ди": 3525, + "ств": 3526, + "ско": 3527, + "ные": 3528, + "чи": 3529, + "ют": 3530, + "дер": 3531, + "стра": 3532, + "ты": 3533, + "ход": 3534, + "щи": 3535, + "зо": 3536, + "зна": 3537, + "ности": 3538, + "чес": 3539, + "вля": 3540, + "вать": 3541, + "ор": 3542, + "пол": 3543, + "вет": 3544, + "так": 3545, + "ша": 3546, + "ту": 3547, + "сво": 3548, + "пре": 3549, + "она": 3550, + "итель": 3551, + "ный": 3552, + "сло": 3553, + "как": 3554, + "вл": 3555, + "ность": 3556, + "хо": 3557, + "мож": 3558, + "пе": 3559, + "для": 3560, + "ния": 3561, + "ное": 3562, + "рас": 3563, + "долж": 3564, + "дар": 3565, + "тель": 3566, + "ска": 3567, + "пу": 3568, + "ство": 3569, + "кото": 3570, + "раб": 3571, + "ее": 3572, + "род": 3573, + "эти": 3574, + "соб": 3575, + "ору": 3576, + "жен": 3577, + "ным": 3578, + "ити": 3579, + "ние": 3580, + "ком": 3581, + "дет": 3582, + "сту": 3583, + "гу": 3584, + "пи": 3585, + "меж": 3586, + "ению": 3587, + "тер": 3588, + "работ": 3589, + "воз": 3590, + "ция": 3591, + "кой": 3592, + "щест": 3593, + "гра": 3594, + "зи": 3595, + "ря": 3596, + "между": 3597, + "ства": 3598, + "вс": 3599, + "ело": 3600, + "ше": 3601, + "мер": 3602, + "ба": 3603, + "зы": 3604, + "лу": 3605, + "аль": 3606, + "дей": 3607, + "гла": 3608, + "народ": 3609, + "кти": 3610, + "предста": 3611, + "лся": 3612, + "явля": 3613, + "ски": 3614, + "нов": 3615, + "един": 3616, + "ров": 3617, + "ис": 3618, + "нима": 3619, + "рем": 3620, + "ходи": 3621, + "также": 3622, + "дру": 3623, + "ать": 3624, + "след": 3625, + "гово": 3626, + "ная": 3627, + "ющи": 3628, + "ень": 3629, + "которы": 3630, + "хот": 3631, + "ву": 3632, + "их": 3633, + "ему": 3634, + "чит": 3635, + "важ": 3636, + "орга": 3637, + "чески": 3638, + "ще": 3639, + "ке": 3640, + "ха": 3641, + "пос": 3642, + "том": 3643, + "боль": 3644, + "мне": 3645, + "пас": 3646, + "объ": 3647, + "прав": 3648, + "конф": 3649, + "слу": 3650, + "поддер": 3651, + "стви": 3652, + "наш": 3653, + "лько": 3654, + "стоя": 3655, + "ную": 3656, + "лем": 3657, + "енных": 3658, + "кра": 3659, + "ды": 3660, + "международ": 3661, + "гда": 3662, + "необ": 3663, + "госу": 3664, + "ству": 3665, + "ении": 3666, + "государ": 3667, + "кто": 3668, + "им": 3669, + "чест": 3670, + "рет": 3671, + "вопро": 3672, + "лен": 3673, + "ели": 3674, + "рова": 3675, + "ций": 3676, + "нам": 3677, + "этой": 3678, + "жения": 3679, + "необходи": 3680, + "меня": 3681, + "было": 3682, + "сили": 3683, + "фи": 3684, + "вя": 3685, + "шь": 3686, + "этого": 3687, + "они": 3688, + "органи": 3689, + "безо": 3690, + "проб": 3691, + "име": 3692, + "реш": 3693, + "би": 3694, + "безопас": 3695, + "ются": 3696, + "оста": 3697, + "енно": 3698, + "год": 3699, + "ела": 3700, + "представ": 3701, + "ться": 3702, + "слово": 3703, + "организа": 3704, + "должны": 3705, + "этом": 3706, + "бла": 3707, + "че": 3708, + "чу": 3709, + "благо": 3710, + "этому": 3711, + "врем": 3712, + "спе": 3713, + "ном": 3714, + "ений": 3715, + "спо": 3716, + "нас": 3717, + "нет": 3718, + "зу": 3719, + "вед": 3720, + "еще": 3721, + "сказа": 3722, + "сей": 3723, + "ерен": 3724, + "дан": 3725, + "сам": 3726, + "еля": 3727, + "ран": 3728, + "зыва": 3729, + "является": 3730, + "будет": 3731, + "ктив": 3732, + "тре": 3733, + "деле": 3734, + "мот": 3735, + "конферен": 3736, + "лась": 3737, + "час": 3738, + "сторо": 3739, + "кого": 3740, + "ез": 3741, + "ней": 3742, + "ос": 3743, + "лись": 3744, + "разору": 3745, + "пере": 3746, + "сси": 3747, + "ными": 3748, + "проц": 3749, + "голо": 3750, + "чело": 3751, + "боле": 3752, + "челове": 3753, + "сер": 3754, + "пл": 3755, + "чет": 3756, + "стран": 3757, + "пя": 3758, + "был": 3759, + "кла": 3760, + "тов": 3761, + "жд": 3762, + "дела": 3763, + "ера": 3764, + "уже": 3765, + "совет": 3766, + "ген": 3767, + "безопасности": 3768, + "ца": 3769, + "седа": 3770, + "поз": 3771, + "ответ": 3772, + "проблем": 3773, + "нако": 3774, + "тем": 3775, + "доста": 3776, + "пы": 3777, + "ща": 3778, + "вой": 3779, + "сущест": 3780, + "необходимо": 3781, + "быть": 3782, + "может": 3783, + "дем": 3784, + "чтобы": 3785, + "ек": 3786, + "чер": 3787, + "усили": 3788, + "рес": 3789, + "руд": 3790, + "единенных": 3791, + "доб": 3792, + "дости": 3793, + "ствен": 3794, + "ядер": 3795, + "годня": 3796, + "каза": 3797, + "сегодня": 3798, + "сейчас": 3799, + "только": 3800, + "вод": 3801, + "есь": 3802, + "много": 3803, + "буду": 3804, + "ев": 3805, + "есть": 3806, + "три": 3807, + "общест": 3808, + "явл": 3809, + "высту": 3810, + "ред": 3811, + "счит": 3812, + "сит": 3813, + "делега": 3814, + "лож": 3815, + "этот": 3816, + "фор": 3817, + "клю": 3818, + "возмож": 3819, + "вания": 3820, + "бли": 3821, + "или": 3822, + "вз": 3823, + "наций": 3824, + "ского": 3825, + "приня": 3826, + "пла": 3827, + "оч": 3828, + "иться": 3829, + "сте": 3830, + "наши": 3831, + "которые": 3832, + "ар": 3833, + "имеет": 3834, + "сот": 3835, + "знач": 3836, + "перь": 3837, + "следу": 3838, + "ены": 3839, + "таки": 3840, + "объединенных": 3841, + "стро": 3842, + "теперь": 3843, + "бле": 3844, + "благодар": 3845, + "разв": 3846, + "ан": 3847, + "жива": 3848, + "очень": 3849, + "ят": 3850, + "без": 3851, + "обес": 3852, + "гро": 3853, + "лось": 3854, + "сы": 3855, + "организации": 3856, + "член": 3857, + "того": 3858, + "ональ": 3859, + "жда": 3860, + "всех": 3861, + "свя": 3862, + "более": 3863, + "сов": 3864, + "когда": 3865, + "вот": 3866, + "кре": 3867, + "кры": 3868, + "поэтому": 3869, + "воль": 3870, + "ой": 3871, + "генера": 3872, + "чем": 3873, + "лы": 3874, + "полити": 3875, + "вен": 3876, + "конференции": 3877, + "процес": 3878, + "бя": 3879, + "ите": 3880, + "отно": 3881, + "развити": 3882, + "аф": 3883, + "ющ": 3884, + "вно": 3885, + "мир": 3886, + "нии": 3887, + "кая": 3888, + "ас": 3889, + "ительно": 3890, + "вто": 3891, + "ением": 3892, + "генераль": 3893, + "прот": 3894, + "всем": 3895, + "самбле": 3896, + "ассамбле": 3897, + "ом": 3898, + "зд": 3899, + "смот": 3900, + "реги": 3901, + "чего": 3902, + "однако": 3903, + "усилия": 3904, + "действи": 3905, + "чно": 3906, + "уча": 3907, + "образ": 3908, + "вос": 3909, + "эта": 3910, + "перего": 3911, + "говор": 3912, + "вам": 3913, + "моло": 3914, + "время": 3915, + "дь": 3916, + "хотел": 3917, + "гру": 3918, + "заявл": 3919, + "предоста": 3920, + "поль": 3921, + "нее": 3922, + "резо": 3923, + "перегово": 3924, + "резолю": 3925, + "крет": 3926, + "поддерж": 3927, + "обеспе": 3928, + "него": 3929, + "представит": 3930, + "наде": 3931, + "кри": 3932, + "чь": 3933, + "проек": 3934, + "лет": 3935, + "други": 3936, + "_": 3937, + "،": 3938, + "؛": 3939, + "؟": 3940, + "ء": 3941, + "آ": 3942, + "أ": 3943, + "ؤ": 3944, + "إ": 3945, + "ئ": 3946, + "ا": 3947, + "ب": 3948, + "ة": 3949, + "ت": 3950, + "ث": 3951, + "ج": 3952, + "ح": 3953, + "خ": 3954, + "د": 3955, + "ذ": 3956, + "ر": 3957, + "ز": 3958, + "س": 3959, + "ش": 3960, + "ص": 3961, + "ض": 3962, + "ط": 3963, + "ظ": 3964, + "ع": 3965, + "غ": 3966, + "ـ": 3967, + "ف": 3968, + "ق": 3969, + "ك": 3970, + "ل": 3971, + "م": 3972, + "ن": 3973, + "ه": 3974, + "و": 3975, + "ى": 3976, + "ي": 3977, + "ً": 3978, + "ٌ": 3979, + "ٍ": 3980, + "َ": 3981, + "ُ": 3982, + "ِ": 3983, + "ّ": 3984, + "ْ": 3985, + "ٰ": 3986, + "چ": 3987, + "ڨ": 3988, + "ک": 3989, + "ھ": 3990, + "ی": 3991, + "ۖ": 3992, + "ۗ": 3993, + "ۘ": 3994, + "ۚ": 3995, + "ۛ": 3996, + "—": 3997, + "☭": 3998, + "ﺃ": 3999, + "ﻻ": 4000, + "ال": 4001, + "َا": 4002, + "وَ": 4003, + "َّ": 4004, + "ِي": 4005, + "أَ": 4006, + "لَ": 4007, + "نَ": 4008, + "الْ": 4009, + "هُ": 4010, + "ُو": 4011, + "ما": 4012, + "نْ": 4013, + "من": 4014, + "عَ": 4015, + "نا": 4016, + "لا": 4017, + "مَ": 4018, + "تَ": 4019, + "فَ": 4020, + "أن": 4021, + "لي": 4022, + "مِ": 4023, + "ان": 4024, + "في": 4025, + "رَ": 4026, + "يَ": 4027, + "هِ": 4028, + "مْ": 4029, + "قَ": 4030, + "بِ": 4031, + "لى": 4032, + "ين": 4033, + "إِ": 4034, + "لِ": 4035, + "وا": 4036, + "كَ": 4037, + "ها": 4038, + "ًا": 4039, + "مُ": 4040, + "ون": 4041, + "الم": 4042, + "بَ": 4043, + "يا": 4044, + "ذا": 4045, + "سا": 4046, + "الل": 4047, + "مي": 4048, + "يْ": 4049, + "را": 4050, + "ري": 4051, + "لك": 4052, + "مَا": 4053, + "نَّ": 4054, + "لم": 4055, + "إن": 4056, + "ست": 4057, + "وم": 4058, + "َّا": 4059, + "لَا": 4060, + "هم": 4061, + "ِّ": 4062, + "كُ": 4063, + "كان": 4064, + "سَ": 4065, + "با": 4066, + "دي": 4067, + "حَ": 4068, + "عْ": 4069, + "بي": 4070, + "الأ": 4071, + "ول": 4072, + "فِي": 4073, + "رِ": 4074, + "دا": 4075, + "مِنْ": 4076, + "ُونَ": 4077, + "وْ": 4078, + "هَا": 4079, + "ُّ": 4080, + "الس": 4081, + "الَ": 4082, + "ني": 4083, + "لْ": 4084, + "تُ": 4085, + "هل": 4086, + "رة": 4087, + "دَ": 4088, + "سْ": 4089, + "تِ": 4090, + "نَا": 4091, + "رْ": 4092, + "اللَّ": 4093, + "سامي": 4094, + "كن": 4095, + "كل": 4096, + "هَ": 4097, + "عَلَ": 4098, + "على": 4099, + "مع": 4100, + "إلى": 4101, + "قد": 4102, + "الر": 4103, + "ُوا": 4104, + "ير": 4105, + "عن": 4106, + "يُ": 4107, + "نِ": 4108, + "بْ": 4109, + "الح": 4110, + "هُمْ": 4111, + "قا": 4112, + "ذه": 4113, + "الت": 4114, + "ِينَ": 4115, + "جَ": 4116, + "هذا": 4117, + "عد": 4118, + "الع": 4119, + "دْ": 4120, + "قَالَ": 4121, + "رُ": 4122, + "يم": 4123, + "ية": 4124, + "نُ": 4125, + "خَ": 4126, + "رب": 4127, + "الك": 4128, + "وَا": 4129, + "أنا": 4130, + "ةِ": 4131, + "الن": 4132, + "حد": 4133, + "عِ": 4134, + "تا": 4135, + "هو": 4136, + "فا": 4137, + "عا": 4138, + "الش": 4139, + "لُ": 4140, + "يت": 4141, + "ذَا": 4142, + "يع": 4143, + "الذ": 4144, + "حْ": 4145, + "الص": 4146, + "إِنَّ": 4147, + "جا": 4148, + "علي": 4149, + "كَا": 4150, + "بُ": 4151, + "تع": 4152, + "وق": 4153, + "مل": 4154, + "لَّ": 4155, + "يد": 4156, + "أخ": 4157, + "رف": 4158, + "تي": 4159, + "الِ": 4160, + "ّا": 4161, + "ذلك": 4162, + "أَنْ": 4163, + "سِ": 4164, + "توم": 4165, + "مر": 4166, + "مَنْ": 4167, + "بل": 4168, + "الق": 4169, + "الله": 4170, + "ِيَ": 4171, + "كم": 4172, + "ذَ": 4173, + "عل": 4174, + "حب": 4175, + "سي": 4176, + "عُ": 4177, + "الج": 4178, + "الد": 4179, + "شَ": 4180, + "تك": 4181, + "فْ": 4182, + "صَ": 4183, + "لل": 4184, + "دِ": 4185, + "بر": 4186, + "فِ": 4187, + "ته": 4188, + "أع": 4189, + "تْ": 4190, + "قْ": 4191, + "الْأَ": 4192, + "ئِ": 4193, + "عَنْ": 4194, + "ور": 4195, + "حا": 4196, + "الَّ": 4197, + "مت": 4198, + "فر": 4199, + "دُ": 4200, + "هنا": 4201, + "وَأَ": 4202, + "تب": 4203, + "ةُ": 4204, + "أي": 4205, + "سب": 4206, + "ريد": 4207, + "وج": 4208, + "كُمْ": 4209, + "حِ": 4210, + "كْ": 4211, + "در": 4212, + "َاء": 4213, + "هذه": 4214, + "الط": 4215, + "الْمُ": 4216, + "دة": 4217, + "قل": 4218, + "غَ": 4219, + "يوم": 4220, + "الَّذ": 4221, + "كر": 4222, + "تر": 4223, + "كِ": 4224, + "كي": 4225, + "عَلَى": 4226, + "رَب": 4227, + "عة": 4228, + "قُ": 4229, + "جْ": 4230, + "فض": 4231, + "لة": 4232, + "هْ": 4233, + "رَا": 4234, + "وَلَ": 4235, + "الْمَ": 4236, + "أَنَّ": 4237, + "يَا": 4238, + "أُ": 4239, + "شي": 4240, + "اللَّهُ": 4241, + "لَى": 4242, + "قِ": 4243, + "أت": 4244, + "عَلَيْ": 4245, + "اللَّهِ": 4246, + "الب": 4247, + "ضَ": 4248, + "ةً": 4249, + "قي": 4250, + "ار": 4251, + "بد": 4252, + "خْ": 4253, + "سْتَ": 4254, + "طَ": 4255, + "قَدْ": 4256, + "ذهب": 4257, + "أم": 4258, + "ماذا": 4259, + "وَإِ": 4260, + "ةٌ": 4261, + "ونَ": 4262, + "ليلى": 4263, + "ولا": 4264, + "حُ": 4265, + "هي": 4266, + "صل": 4267, + "الخ": 4268, + "ود": 4269, + "ليس": 4270, + "لدي": 4271, + "قال": 4272, + "كَانَ": 4273, + "مَّ": 4274, + "حي": 4275, + "تم": 4276, + "لن": 4277, + "وَلَا": 4278, + "بع": 4279, + "يمكن": 4280, + "سُ": 4281, + "ةَ": 4282, + "حت": 4283, + "رًا": 4284, + "كا": 4285, + "شا": 4286, + "هِمْ": 4287, + "لَهُ": 4288, + "زَ": 4289, + "داً": 4290, + "مس": 4291, + "كث": 4292, + "الْعَ": 4293, + "جِ": 4294, + "صْ": 4295, + "فَا": 4296, + "له": 4297, + "وي": 4298, + "عَا": 4299, + "هُوَ": 4300, + "بِي": 4301, + "بَا": 4302, + "أس": 4303, + "ثَ": 4304, + "لِي": 4305, + "رض": 4306, + "الرَّ": 4307, + "لِكَ": 4308, + "تَّ": 4309, + "فُ": 4310, + "قة": 4311, + "فعل": 4312, + "مِن": 4313, + "الآ": 4314, + "ثُ": 4315, + "سم": 4316, + "مَّا": 4317, + "بِهِ": 4318, + "تق": 4319, + "خر": 4320, + "لقد": 4321, + "خل": 4322, + "شر": 4323, + "أنت": 4324, + "لَّا": 4325, + "سن": 4326, + "السَّ": 4327, + "الذي": 4328, + "سَا": 4329, + "وما": 4330, + "زل": 4331, + "وب": 4332, + "أْ": 4333, + "إذا": 4334, + "رِي": 4335, + "حة": 4336, + "نِي": 4337, + "الْحَ": 4338, + "وَقَالَ": 4339, + "به": 4340, + "ةٍ": 4341, + "سأ": 4342, + "رٌ": 4343, + "بال": 4344, + "مة": 4345, + "شْ": 4346, + "وت": 4347, + "عند": 4348, + "فس": 4349, + "بَعْ": 4350, + "هر": 4351, + "قط": 4352, + "أح": 4353, + "إنه": 4354, + "وع": 4355, + "فت": 4356, + "غا": 4357, + "هناك": 4358, + "بت": 4359, + "مِنَ": 4360, + "سر": 4361, + "ذَلِكَ": 4362, + "رس": 4363, + "حدث": 4364, + "غْ": 4365, + "ِّي": 4366, + "الإ": 4367, + "وَيَ": 4368, + "جل": 4369, + "است": 4370, + "قِي": 4371, + "عب": 4372, + "وس": 4373, + "يش": 4374, + "الَّذِينَ": 4375, + "تاب": 4376, + "دِي": 4377, + "جب": 4378, + "كون": 4379, + "بن": 4380, + "الث": 4381, + "لَيْ": 4382, + "بعد": 4383, + "وَالْ": 4384, + "فَأَ": 4385, + "عم": 4386, + "هُم": 4387, + "تن": 4388, + "ذْ": 4389, + "أص": 4390, + "أين": 4391, + "رَبِّ": 4392, + "الذين": 4393, + "إِن": 4394, + "بين": 4395, + "جُ": 4396, + "عَلَيْهِ": 4397, + "حَا": 4398, + "لو": 4399, + "ستط": 4400, + "ظر": 4401, + "لَمْ": 4402, + "ءِ": 4403, + "كُل": 4404, + "طل": 4405, + "تَا": 4406, + "ضُ": 4407, + "كنت": 4408, + "لًا": 4409, + "مٌ": 4410, + "قبل": 4411, + "ــ": 4412, + "ذِ": 4413, + "قَوْ": 4414, + "صِ": 4415, + "مًا": 4416, + "كانت": 4417, + "صا": 4418, + "يق": 4419, + "الف": 4420, + "النا": 4421, + "مٍ": 4422, + "إِنْ": 4423, + "النَّ": 4424, + "جد": 4425, + "وَمَا": 4426, + "تت": 4427, + "بح": 4428, + "مكان": 4429, + "كيف": 4430, + "ّة": 4431, + "الا": 4432, + "جَا": 4433, + "أو": 4434, + "ساعد": 4435, + "ضِ": 4436, + "إلا": 4437, + "راً": 4438, + "قَا": 4439, + "رأ": 4440, + "عت": 4441, + "أحد": 4442, + "هد": 4443, + "ضا": 4444, + "طر": 4445, + "أق": 4446, + "ماء": 4447, + "دَّ": 4448, + "البا": 4449, + "مُو": 4450, + "أَوْ": 4451, + "طا": 4452, + "قُو": 4453, + "خِ": 4454, + "تل": 4455, + "ستطيع": 4456, + "دَا": 4457, + "النَّا": 4458, + "إلَى": 4459, + "وَتَ": 4460, + "هَذَا": 4461, + "بة": 4462, + "عليك": 4463, + "جر": 4464, + "المن": 4465, + "زا": 4466, + "رٍ": 4467, + "دع": 4468, + "ًّا": 4469, + "سة": 4470, + "ثُمَّ": 4471, + "شيء": 4472, + "الغ": 4473, + "تح": 4474, + "رُونَ": 4475, + "اليوم": 4476, + "مِي": 4477, + "نُوا": 4478, + "أر": 4479, + "تُمْ": 4480, + "عر": 4481, + "يف": 4482, + "أب": 4483, + "دًا": 4484, + "صَا": 4485, + "التَّ": 4486, + "أريد": 4487, + "الز": 4488, + "يَوْ": 4489, + "إلي": 4490, + "جي": 4491, + "يَعْ": 4492, + "فضل": 4493, + "الإن": 4494, + "أنه": 4495, + "1": 4496, + "2": 4497, + "3": 4498, + "4": 4499, + "5": 4500, + "·": 4501, + "×": 4502, + "̃": 4503, + "̌": 4504, + "ε": 4505, + "λ": 4506, + "μ": 4507, + "•": 4508, + "‧": 4509, + "─": 4510, + "□": 4511, + "、": 4512, + "。": 4513, + "〈": 4514, + "〉": 4515, + "《": 4516, + "》": 4517, + "「": 4518, + "」": 4519, + "『": 4520, + "』": 4521, + "ア": 4522, + "オ": 4523, + "カ": 4524, + "チ": 4525, + "ド": 4526, + "ベ": 4527, + "ャ": 4528, + "ヤ": 4529, + "ン": 4530, + "・": 4531, + "ー": 4532, + "ㄟ": 4533, + "!": 4534, + "(": 4535, + ")": 4536, + ",": 4537, + "-": 4538, + "/": 4539, + ":": 4540, + ";": 4541, + "?": 4542, + "p": 4543, + "i4": 4544, + "zh": 4545, + "i2": 4546, + "ng1": 4547, + "u4": 4548, + "i1": 4549, + "ng2": 4550, + "u3": 4551, + "de5": 4552, + "e4": 4553, + "i3": 4554, + "ng4": 4555, + "an4": 4556, + "shi4": 4557, + "an2": 4558, + "u2": 4559, + "u1": 4560, + "ng3": 4561, + "a1": 4562, + "an1": 4563, + "e2": 4564, + "a4": 4565, + "ei4": 4566, + "ong1": 4567, + "ai4": 4568, + "ao4": 4569, + "ang1": 4570, + "an3": 4571, + "wei4": 4572, + "uo2": 4573, + "n1": 4574, + "en2": 4575, + "ao3": 4576, + "e1": 4577, + "qi": 4578, + "eng2": 4579, + "zho": 4580, + "ang3": 4581, + "ang4": 4582, + "ang2": 4583, + "uo4": 4584, + "ge4": 4585, + "yi1": 4586, + "guo2": 4587, + "a3": 4588, + "he2": 4589, + "e3": 4590, + "yi2": 4591, + "di4": 4592, + "zhong1": 4593, + "bu4": 4594, + "ai2": 4595, + "n2": 4596, + "zai4": 4597, + "shi2": 4598, + "eng1": 4599, + "ren2": 4600, + "ong2": 4601, + "xian4": 4602, + "xu": 4603, + "n4": 4604, + "li4": 4605, + "en4": 4606, + "yu2": 4607, + "ei2": 4608, + "yi2ge4": 4609, + "ou4": 4610, + "ei3": 4611, + "ui4": 4612, + "a2": 4613, + "you3": 4614, + "ao1": 4615, + "da4": 4616, + "cheng2": 4617, + "en1": 4618, + "eng4": 4619, + "yi4": 4620, + "si1": 4621, + "zhi4": 4622, + "jia1": 4623, + "yuan2": 4624, + "ta1": 4625, + "de5yi2ge4": 4626, + "ke1": 4627, + "shu3": 4628, + "xi1": 4629, + "ji2": 4630, + "ao2": 4631, + "ou3": 4632, + "ong4": 4633, + "xia4": 4634, + "ai1": 4635, + "gong1": 4636, + "zhi1": 4637, + "en3": 4638, + "wei2": 4639, + "xue2": 4640, + "qu1": 4641, + "zhou1": 4642, + "er3": 4643, + "ming2": 4644, + "zhong3": 4645, + "li3": 4646, + "wu4": 4647, + "yi3": 4648, + "uo1": 4649, + "e5": 4650, + "ji4": 4651, + "xing2": 4652, + "jian4": 4653, + "hua4": 4654, + "yu3": 4655, + "uo3": 4656, + "ji1": 4657, + "ai3": 4658, + "zuo4": 4659, + "hou4": 4660, + "hui4": 4661, + "ei1": 4662, + "nian2": 4663, + "qi2": 4664, + "dao4": 4665, + "sheng1": 4666, + "de2": 4667, + "dai4": 4668, + "uan2": 4669, + "zhe4": 4670, + "zheng4": 4671, + "ben3": 4672, + "shang4": 4673, + "zhu3": 4674, + "bei4": 4675, + "ye4": 4676, + "chu1": 4677, + "zhan4": 4678, + "le5": 4679, + "lai2": 4680, + "shi3": 4681, + "nan2": 4682, + "ren4": 4683, + "you2": 4684, + "ke4": 4685, + "ba1": 4686, + "fu4": 4687, + "dui4": 4688, + "ya4": 4689, + "mei3": 4690, + "zi4": 4691, + "xin1": 4692, + "jing1": 4693, + "zhu": 4694, + "n3": 4695, + "yong4": 4696, + "mu4": 4697, + "jiao4": 4698, + "ye3": 4699, + "jin4": 4700, + "bian4": 4701, + "lu4": 4702, + "qi1": 4703, + "she4": 4704, + "xiang1": 4705, + "ong3": 4706, + "shu4": 4707, + "dong4": 4708, + "suo3": 4709, + "guan1": 4710, + "san1": 4711, + "te4": 4712, + "duo1": 4713, + "fu2": 4714, + "min2": 4715, + "la1": 4716, + "zhi2": 4717, + "zhen4": 4718, + "ou1": 4719, + "wu3": 4720, + "ma3": 4721, + "i5": 4722, + "zi5": 4723, + "ju4": 4724, + "er4": 4725, + "yao4": 4726, + "xia4de5yi2ge4": 4727, + "si4": 4728, + "tu2": 4729, + "shan1": 4730, + "zui4": 4731, + "yin1": 4732, + "er2": 4733, + "tong2": 4734, + "dong1": 4735, + "yu4": 4736, + "yan2": 4737, + "qian2": 4738, + "shu3xia4de5yi2ge4": 4739, + "jun1": 4740, + "ke3": 4741, + "wen2": 4742, + "fa3": 4743, + "luo2": 4744, + "zhu4": 4745, + "xi4": 4746, + "kou3": 4747, + "bei3": 4748, + "jian1": 4749, + "fa1": 4750, + "dian4": 4751, + "jiang1": 4752, + "wei4yu2": 4753, + "xiang4": 4754, + "zhi3": 4755, + "eng3": 4756, + "fang1": 4757, + "lan2": 4758, + "shu": 4759, + "ri4": 4760, + "lian2": 4761, + "shou3": 4762, + "qiu2": 4763, + "jin1": 4764, + "huo4": 4765, + "shu3xia4de5yi2ge4zhong3": 4766, + "fen1": 4767, + "nei4": 4768, + "gai1": 4769, + "mei3guo2": 4770, + "un2": 4771, + "ge2": 4772, + "bao3": 4773, + "qing1": 4774, + "gao1": 4775, + "tai2": 4776, + "xiao3": 4777, + "jie2": 4778, + "tian1": 4779, + "chang2": 4780, + "quan2": 4781, + "lie4": 4782, + "hai3": 4783, + "fei1": 4784, + "ti3": 4785, + "jue2": 4786, + "ou2": 4787, + "ci3": 4788, + "zu2": 4789, + "ni2": 4790, + "biao3": 4791, + "zhong1guo2": 4792, + "du4": 4793, + "yue4": 4794, + "xing4": 4795, + "sheng4": 4796, + "che1": 4797, + "dan1": 4798, + "jie1": 4799, + "lin2": 4800, + "ping2": 4801, + "fu3": 4802, + "gu3": 4803, + "jie4": 4804, + "v3": 4805, + "sheng3": 4806, + "na4": 4807, + "yuan4": 4808, + "zhang3": 4809, + "guan3": 4810, + "dao3": 4811, + "zu3": 4812, + "ding4": 4813, + "dian3": 4814, + "ceng2": 4815, + "ren2kou3": 4816, + "tai4": 4817, + "tong1": 4818, + "guo4": 4819, + "neng2": 4820, + "chang3": 4821, + "hua2": 4822, + "liu2": 4823, + "ying1": 4824, + "xiao4": 4825, + "ci4": 4826, + "bian4hua4": 4827, + "liang3": 4828, + "gong4": 4829, + "zhong4": 4830, + "de5yi1": 4831, + "se4": 4832, + "kai1": 4833, + "wang2": 4834, + "jiu4": 4835, + "shi1": 4836, + "shou4": 4837, + "mei2": 4838, + "feng1": 4839, + "ze2": 4840, + "tu2shi4": 4841, + "ti2": 4842, + "qi4": 4843, + "jiu3": 4844, + "shen1": 4845, + "zhe3": 4846, + "ren2kou3bian4hua4": 4847, + "ren2kou3bian4hua4tu2shi4": 4848, + "di4qu1": 4849, + "yang2": 4850, + "men5": 4851, + "long2": 4852, + "bing4": 4853, + "chan3": 4854, + "zhu1": 4855, + "wei3": 4856, + "wai4": 4857, + "xing1": 4858, + "bo1": 4859, + "bi3": 4860, + "tang2": 4861, + "hua1": 4862, + "bo2": 4863, + "shui3": 4864, + "shu1": 4865, + "dou1": 4866, + "sai4": 4867, + "chao2": 4868, + "bi4": 4869, + "ling2": 4870, + "lei4": 4871, + "da4xue2": 4872, + "fen4": 4873, + "shu3de5": 4874, + "mu3": 4875, + "jiao1": 4876, + "dang1": 4877, + "cheng1": 4878, + "tong3": 4879, + "nv3": 4880, + "qi3": 4881, + "yan3": 4882, + "mian4": 4883, + "luo4": 4884, + "jing4": 4885, + "ge1": 4886, + "ru4": 4887, + "dan4": 4888, + "ri4ben3": 4889, + "pu3": 4890, + "yun4": 4891, + "huang2": 4892, + "wo3": 4893, + "lv": 4894, + "hai2": 4895, + "shi4yi1": 4896, + "xie1": 4897, + "ying3": 4898, + "wu2": 4899, + "shen2": 4900, + "wang3": 4901, + "guang3": 4902, + "liu4": 4903, + "su4": 4904, + "shi4zhen4": 4905, + "can1": 4906, + "cao3": 4907, + "xia2": 4908, + "ka3": 4909, + "da2": 4910, + "hu4": 4911, + "ban4": 4912, + "dang3": 4913, + "hu2": 4914, + "zong3": 4915, + "deng3": 4916, + "de5yi2ge4shi4zhen4": 4917, + "chuan2": 4918, + "mo4": 4919, + "zhang1": 4920, + "ban1": 4921, + "mo2": 4922, + "cha2": 4923, + "ce4": 4924, + "zhu3yao4": 4925, + "tou2": 4926, + "ju2": 4927, + "shi4wei4yu2": 4928, + "sa4": 4929, + "un1": 4930, + "ke3yi3": 4931, + "du1": 4932, + "han4": 4933, + "liang4": 4934, + "sha1": 4935, + "jia3": 4936, + "zi1": 4937, + "lv4": 4938, + "fu1": 4939, + "xian1": 4940, + "xu4": 4941, + "guang1": 4942, + "meng2": 4943, + "bao4": 4944, + "you4": 4945, + "rong2": 4946, + "zhi1yi1": 4947, + "wei1": 4948, + "mao2": 4949, + "guo2jia1": 4950, + "cong2": 4951, + "gou4": 4952, + "tie3": 4953, + "zhen1": 4954, + "du2": 4955, + "bian1": 4956, + "ci2": 4957, + "qu3": 4958, + "fan4": 4959, + "xiang3": 4960, + "men2": 4961, + "ju1": 4962, + "hong2": 4963, + "zi3": 4964, + "ta1men5": 4965, + "ji3": 4966, + "zong1": 4967, + "zhou1de5yi2ge4shi4zhen4": 4968, + "tuan2": 4969, + "jing3": 4970, + "gong1si1": 4971, + "xie4": 4972, + "li2": 4973, + "li4shi3": 4974, + "bao1": 4975, + "gang3": 4976, + "gui1": 4977, + "zheng1": 4978, + "zhi2wu4": 4979, + "ta1de5": 4980, + "pin3": 4981, + "zhuan1": 4982, + "chong2": 4983, + "shi3yong4": 4984, + "wa3": 4985, + "shuo1": 4986, + "chuan1": 4987, + "lei2": 4988, + "wan1": 4989, + "huo2": 4990, + "su1": 4991, + "zao3": 4992, + "gai3": 4993, + "qu4": 4994, + "gu4": 4995, + "xi2": 4996, + "hang2": 4997, + "ying4": 4998, + "cun1": 4999, + "gen1": 5000, + "ying2": 5001, + "ting2": 5002, + "cheng2shi4": 5003, + "jiang3": 5004, + "ling3": 5005, + "lun2": 5006, + "bu4fen4": 5007, + "deng1": 5008, + "xuan3": 5009, + "dong4wu4": 5010, + "de2guo2": 5011, + "xian3": 5012, + "fan3": 5013, + "zhe5": 5014, + "han2": 5015, + "hao4": 5016, + "mi4": 5017, + "ran2": 5018, + "qin1": 5019, + "tiao2": 5020, + "zhan3": 5021, + "[ar]": 5022, + "[zh-cn]": 5023, + "shi": 5026, + "tsu": 5027, + "teki": 5028, + "nai": 5029, + "aru": 5030, + "uu": 5031, + "kai": 5032, + "shite": 5033, + "mono": 5034, + "koto": 5035, + "kara": 5036, + "shita": 5037, + "suru": 5038, + "masu": 5039, + "tai": 5040, + "ware": 5041, + "shin": 5042, + "oku": 5043, + "yuu": 5044, + "iru": 5045, + "jiko": 5046, + "desu": 5047, + "rare": 5048, + "shou": 5049, + "sha": 5050, + "sekai": 5051, + "kyou": 5052, + "mashita": 5053, + "nara": 5054, + "kei": 5055, + "ita": 5056, + "ari": 5057, + "itsu": 5058, + "kono": 5059, + "naka": 5060, + "chou": 5061, + "sore": 5062, + "naru": 5063, + "gaku": 5064, + "reba": 5065, + "hito": 5066, + "sai": 5067, + "nan": 5068, + "dai": 5069, + "tsuku": 5070, + "shiki": 5071, + "sare": 5072, + "naku": 5073, + "jun": 5074, + "kaku": 5075, + "zai": 5076, + "wata": 5077, + "shuu": 5078, + "ii": 5079, + "kare": 5080, + "shii": 5081, + "made": 5082, + "sho": 5083, + "kereba": 5084, + "shika": 5085, + "ichi": 5086, + "deki": 5087, + "nin": 5088, + "wareware": 5089, + "nakereba": 5090, + "oite": 5091, + "yaku": 5092, + "mujun": 5093, + "yoku": 5094, + "butsu": 5095, + "omo": 5096, + "gae": 5097, + "naranai": 5098, + "tachi": 5099, + "chuu": 5100, + "kangae": 5101, + "toki": 5102, + "koro": 5103, + "mujunteki": 5104, + "naga": 5105, + "jin": 5106, + "shima": 5107, + "iku": 5108, + "imasu": 5109, + "hon": 5110, + "kae": 5111, + "kore": 5112, + "kita": 5113, + "datta": 5114, + "jitsu": 5115, + "mae": 5116, + "toku": 5117, + "douitsu": 5118, + "ritsu": 5119, + "kyuu": 5120, + "hyou": 5121, + "rareta": 5122, + "keisei": 5123, + "kkan": 5124, + "rareru": 5125, + "mou": 5126, + "doko": 5127, + "ryou": 5128, + "dake": 5129, + "nakatta": 5130, + "soko": 5131, + "tabe": 5132, + "hana": 5133, + "fuku": 5134, + "yasu": 5135, + "wataku": 5136, + "yama": 5137, + "kyo": 5138, + "genzai": 5139, + "boku": 5140, + "ata": 5141, + "kawa": 5142, + "masen": 5143, + "juu": 5144, + "natte": 5145, + "watakushi": 5146, + "yotte": 5147, + "hai": 5148, + "jishin": 5149, + "rete": 5150, + "oka": 5151, + "kagaku": 5152, + "natta": 5153, + "karu": 5154, + "nari": 5155, + "mata": 5156, + "kuru": 5157, + "gai": 5158, + "kari": 5159, + "shakai": 5160, + "koui": 5161, + "yori": 5162, + "setsu": 5163, + "reru": 5164, + "tokoro": 5165, + "jutsu": 5166, + "saku": 5167, + "ttai": 5168, + "ningen": 5169, + "tame": 5170, + "kankyou": 5171, + "ooku": 5172, + "watashi": 5173, + "tsukuru": 5174, + "sugi": 5175, + "jibun": 5176, + "shitsu": 5177, + "keru": 5178, + "kishi": 5179, + "shikashi": 5180, + "moto": 5181, + "mari": 5182, + "itte": 5183, + "deshita": 5184, + "nde": 5185, + "arimasu": 5186, + "koe": 5187, + "zettai": 5188, + "kkanteki": 5189, + "rekishi": 5190, + "dekiru": 5191, + "tsuka": 5192, + "itta": 5193, + "kobutsu": 5194, + "miru": 5195, + "shoku": 5196, + "shimasu": 5197, + "gijutsu": 5198, + "gyou": 5199, + "joushiki": 5200, + "atta": 5201, + "hodo": 5202, + "koko": 5203, + "tsukurareta": 5204, + "zoku": 5205, + "hitei": 5206, + "koku": 5207, + "rekishiteki": 5208, + "kete": 5209, + "kako": 5210, + "nagara": 5211, + "kakaru": 5212, + "shutai": 5213, + "haji": 5214, + "taku": 5215, + "douitsuteki": 5216, + "mete": 5217, + "tsuu": 5218, + "sarete": 5219, + "genjitsu": 5220, + "bai": 5221, + "nawa": 5222, + "jikan": 5223, + "waru": 5224, + "rt": 5225, + "atsu": 5226, + "soku": 5227, + "kouiteki": 5228, + "kata": 5229, + "tetsu": 5230, + "gawa": 5231, + "kedo": 5232, + "reta": 5233, + "sayou": 5234, + "tteru": 5235, + "tori": 5236, + "kimi": 5237, + "mura": 5238, + "sareru": 5239, + "machi": 5240, + "kya": 5241, + "osa": 5242, + "konna": 5243, + "aku": 5244, + "sareta": 5245, + "ipp": 5246, + "shiku": 5247, + "uchi": 5248, + "hitotsu": 5249, + "hatara": 5250, + "tachiba": 5251, + "shiro": 5252, + "katachi": 5253, + "tomo": 5254, + "ete": 5255, + "meru": 5256, + "nichi": 5257, + "dare": 5258, + "katta": 5259, + "eru": 5260, + "suki": 5261, + "ooki": 5262, + "maru": 5263, + "moku": 5264, + "oko": 5265, + "kangaerareru": 5266, + "oto": 5267, + "tanni": 5268, + "tada": 5269, + "taiteki": 5270, + "motte": 5271, + "kinou": 5272, + "shinai": 5273, + "kki": 5274, + "tari": 5275, + "ranai": 5276, + "kkou": 5277, + "mirai": 5278, + "ppon": 5279, + "goto": 5280, + "hitsu": 5281, + "teru": 5282, + "mochi": 5283, + "katsu": 5284, + "nyuu": 5285, + "zuka": 5286, + "tsuite": 5287, + "nomi": 5288, + "sugu": 5289, + "kuda": 5290, + "tetsugaku": 5291, + "ika": 5292, + "ronri": 5293, + "oki": 5294, + "nippon": 5295, + "shimashita": 5296, + "chishiki": 5297, + "chokkanteki": 5298, + "suko": 5299, + "kuu": 5300, + "arou": 5301, + "katte": 5302, + "kuri": 5303, + "inai": 5304, + "hyougen": 5305, + "ishiki": 5306, + "doku": 5307, + "atte": 5308, + "atara": 5309, + "wari": 5310, + "kao": 5311, + "seisan": 5312, + "hanashi": 5313, + "kake": 5314, + "naji": 5315, + "sunawa": 5316, + "sunawachi": 5317, + "ugo": 5318, + "suu": 5319, + "bara": 5320, + "hiro": 5321, + "iwa": 5322, + "betsu": 5323, + "yoi": 5324, + "seru": 5325, + "shiteru": 5326, + "rarete": 5327, + "toshi": 5328, + "seki": 5329, + "tairitsu": 5330, + "wakara": 5331, + "tokyo": 5332, + "kka": 5333, + "kyoku": 5334, + "iro": 5335, + "mite": 5336, + "saki": 5337, + "kanji": 5338, + "mita": 5339, + "sube": 5340, + "ryoku": 5341, + "matta": 5342, + "kudasai": 5343, + "omoi": 5344, + "wareru": 5345, + "hitsuyou": 5346, + "kashi": 5347, + "renai": 5348, + "kankei": 5349, + "gatte": 5350, + "ochi": 5351, + "motsu": 5352, + "sonzai": 5353, + "taishite": 5354, + "ame": 5355, + "seimei": 5356, + "kano": 5357, + "giri": 5358, + "kangaeru": 5359, + "yue": 5360, + "asa": 5361, + "onaji": 5362, + "yoru": 5363, + "niku": 5364, + "osaka": 5365, + "sukoshi": 5366, + "tama": 5367, + "kanojo": 5368, + "kite": 5369, + "mondai": 5370, + "amari": 5371, + "eki": 5372, + "kojin": 5373, + "haya": 5374, + "dete": 5375, + "atarashii": 5376, + "awa": 5377, + "gakkou": 5378, + "tsuzu": 5379, + "shukan": 5380, + "imashita": 5381, + "atae": 5382, + "darou": 5383, + "hataraku": 5384, + "gata": 5385, + "dachi": 5386, + "matsu": 5387, + "arimasen": 5388, + "seibutsu": 5389, + "mitsu": 5390, + "heya": 5391, + "yasui": 5392, + "deni": 5393, + "noko": 5394, + "haha": 5395, + "domo": 5396, + "kami": 5397, + "sudeni": 5398, + "nao": 5399, + "raku": 5400, + "ike": 5401, + "meta": 5402, + "kodomo": 5403, + "soshite": 5404, + "game": 5405, + "bakari": 5406, + "tote": 5407, + "hatsu": 5408, + "mise": 5409, + "mokuteki": 5410, + "dakara": 5411, + "[ja]": 5412 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "e n", + "e r", + "c h", + "e i", + "i e", + "u n", + "i ch", + "ei n", + "s t", + "a n", + "t e", + "g e", + "a u", + "i n", + "s ch", + "d er", + "un d", + "d ie", + "d a", + "e s", + "a l", + "d en", + "a r", + "g en", + "z u", + "d e", + "h r", + "o n", + "t en", + "e l", + "o r", + "m i", + "s ie", + "da s", + "a t", + "b e", + "ein e", + "ich t", + "b er", + "l e", + "a ch", + "v er", + "s e", + "au f", + "w i", + "s o", + "t er", + "l ich", + "c k", + "u r", + "n icht", + "m m", + "b en", + "a s", + "w ar", + "r e", + "mi t", + "s ich", + "i g", + "l l", + "au s", + "i st", + "w ie", + "o ch", + "un g", + "an n", + "ü r", + "h n", + "i hr", + "s a", + "s en", + "t z", + "de m", + "ei t", + "u m", + "h at", + "wi r", + "v on", + "h a", + "s p", + "w ei", + "i er", + "r o", + "h er", + "r a", + "ein en", + "n e", + "v or", + "al s", + "an d", + "al l", + "w as", + "w o", + "r ei", + "st e", + "l ie", + "au ch", + "d u", + "d es", + "k o", + "ü ber", + "a m", + "b ei", + "h en", + "h m", + "l ei", + "a ber", + "w en", + "h l", + "g er", + "i m", + "u t", + "n ach", + "h e", + "i s", + "b r", + "f t", + "en t", + "i mm", + "j e", + "sch en", + "w er", + "s er", + "a b", + "ä n", + "m e", + "s ein", + "i t", + "o l", + "ch t", + "f ür", + "k l", + "f f", + "eine m", + "n en", + "w e", + "j a", + "u s", + "n och", + "hat te", + "t r", + "p f", + "h in", + "d i", + "ch en", + "b l", + "m an", + "r ü", + "ie l", + "s el", + "das s", + "i hn", + "mi r", + "sch l", + "ö n", + "g an", + "g t", + "ein er", + "st en", + "m ich", + "wen n", + "el l", + "g te", + "in d", + "m al", + "ge l", + "k en", + "n ur", + "mm en", + "f ü", + "er n", + "ö r", + "un ter", + "f r", + "an der", + "g r", + "i l", + "d ur", + "u ch", + "f e", + "t a", + "m en", + "m ach", + "d och", + "t i", + "dur ch", + "o s", + "g l", + "h al", + "ihr e", + "w ä", + "imm er", + "i hm", + "k ann", + "or t", + "d ann", + "l an", + "tz t", + "o der", + "hr en", + "e t", + "k ön", + "i ck", + "f a", + "in g", + "i r", + "wie der", + "da ß", + "m ein", + "f en", + "gan z", + "die se", + "st er", + "da r", + "w a", + "ge s", + "n a", + "f l", + "i gen", + "sch e", + "un gen", + "me hr", + "ß en", + "o t", + "k on", + "ge w", + "ha ben", + "ge h", + "ä t", + "s ind", + "d r", + "w el", + "un s", + "v o", + "m a", + "u te", + "sch on", + "b es", + "ge sch", + "b t", + "ch e", + "s on", + "o b", + "l a", + "p p", + "rü ck", + "s eine", + "k r", + "f re", + "ei l", + "zu m", + "u l", + "h ier", + "k t", + "i ge", + "sp r", + "k e", + "le ben", + "b st", + "z eit", + "i on", + "g ro", + "den n", + "h o", + "sch a", + "b ar", + "al le", + "ge gen", + "w ür", + "m ü", + "z e", + "wer den", + "je tzt", + "ko mmen", + "n ie", + "s ei", + "h eit", + "so ll", + "g lei", + "m eine", + "wo ll", + "n er", + "ha be", + "w ur", + "lich en", + "p er", + "as sen", + "n te", + "se hen", + "wir d", + "b is", + "g ar", + "i en", + "m us", + "u ß", + "ä r", + "st ell", + "k eit", + "z wei", + "sel bst", + "st a", + "p a", + "sa gte", + "te t", + "k am", + "s sen", + "v iel", + "u g", + "z en", + "h ei", + "m ann", + "wi ll", + "ge b", + "war en", + "ü ck", + "ä ch", + "m er", + "r u", + "w or", + "h au", + "ei gen", + "an g", + "we g", + "bl ick", + "f ra", + "all es", + "k a", + "au gen", + "f in", + "lich e", + "t o", + "un ser", + "der n", + "her r", + "n un", + "v ie", + "ch te", + "wo hl", + "f all", + "h t", + "ü n", + "et was", + "st and", + "en d", + "ä u", + "e m", + "m ö", + "te l", + "r ie", + "d ich", + "die s", + "h and", + "b in", + "ff en", + "nicht s", + "d an", + "p l", + "hn e", + "ihn en", + "es en", + "die ser", + "fr au", + "an t", + "ar t", + "di r", + "i sch", + "er st", + "glei ch", + "ko mm", + "h ör", + "ß e", + "d ig", + "se hr", + "z ei", + "sa m", + "au m", + "h ät", + "in gen", + "g ut", + "b o", + "m ut", + "ck en", + "kon nte", + "st imm", + "p ro", + "zu r", + "i tz", + "wei l", + "wür de", + "f ä", + "kön nen", + "k eine", + "f er", + "i schen", + "vo ll", + "ein es", + "se tz", + "z ie", + "de l", + "te te", + "sein er", + "ier en", + "ge st", + "zu rück", + "wur de", + "sch n", + "p r", + "lie ß", + "t ra", + "m ä", + "gen d", + "f ol", + "i k", + "schl a", + "scha ft", + "at er", + "wei ß", + "s einen", + "l assen", + "l u", + "und en", + "t eil", + "ne u", + "ier t", + "men schen", + "hm en", + "st r", + "g i", + "sa h", + "ihr en", + "el n", + "wei ter", + "ge hen", + "ig er", + "mach t", + "ta g", + "al so", + "hal ten", + "n is", + "ach t", + "ge ben", + "f or", + "o g", + "n at", + "m ar", + "de t", + "o hne", + "h aus", + "t ro", + "an ge", + "l au", + "sp iel", + "t re", + "sch r", + "in n", + "s u", + "l os", + "mach en", + "hät te", + "be g", + "wir k", + "al t", + "g lich", + "te s", + "r icht", + "fre und", + "m o", + "ihr er", + "f el", + "b el", + "so l", + "ein mal", + "e ben", + "h ol", + "h än", + "q u", + "ter n", + "h ö", + "sch w", + "re cht", + "wa hr", + "s einem", + "ste hen", + "hl en", + "in s", + "g ing", + "woll te", + "wi ssen", + "ung s", + "al d", + "as s", + "ja hr", + "m or", + "wel t", + "un der", + "zu sa", + "at ion", + "ko pf", + "lan g", + "hin ter", + "at z", + "st ra", + "an gen", + "an k", + "a de", + "gl au", + "f ach", + "hat ten", + "l o", + "f ort", + "ei cht", + "i ff", + "l er", + "m ei", + "diese m", + "k ein", + "f rei", + "fü hr", + "vo m", + "e s", + "e n", + "a i", + "o u", + "o n", + "l e", + "d e", + "r e", + "q u", + "a n", + "e r", + "en t", + "e t", + "l a", + "n e", + "i l", + "a r", + "i s", + "ai t", + "t e", + "a u", + "i n", + "qu e", + "i t", + "u r", + "s e", + "l es", + "c h", + "c e", + "m e", + "o r", + "ou r", + "a s", + "p r", + "a v", + "o m", + "ai s", + "u n", + "an t", + "ou s", + "t r", + "t i", + "l u", + "o i", + "e u", + "l le", + "s i", + "p ar", + "d es", + "an s", + "m ent", + "é t", + "es t", + "j e", + "u ne", + "a l", + "p as", + "t re", + "qu i", + "d u", + "r i", + "c on", + "s on", + "c om", + "e lle", + "d é", + "p our", + "d ans", + "l i", + "s a", + "r é", + "t ou", + "v ous", + "d i", + "v i", + "a g", + "a m", + "a t", + "ou v", + "a p", + "ti on", + "m on", + "s ur", + "c i", + "o s", + "p lu", + "s u", + "en d", + "a b", + "è re", + "ai n", + "m ais", + "o is", + "r es", + "plu s", + "é e", + "ai ent", + "m p", + "ch e", + "lu i", + "av e", + "ét ait", + "m a", + "s es", + "tou t", + "i r", + "v o", + "a c", + "s er", + "an d", + "f f", + "oi r", + "g r", + "av ait", + "é s", + "m es", + "n ous", + "eu x", + "b i", + "t er", + "c o", + "on s", + "p u", + "c es", + "g e", + "t u", + "le ur", + "pr o", + "d on", + "e ur", + "et te", + "ai re", + "ave c", + "d it", + "t é", + "i e", + "u s", + "il le", + "p er", + "com me", + "c r", + "or t", + "m i", + "e x", + "u x", + "v er", + "m o", + "è s", + "v e", + "au x", + "r a", + "j our", + "il s", + "bi en", + "c ou", + "p e", + "que l", + "p eu", + "c ette", + "t es", + "p o", + "in s", + "c u", + "m ê", + "s o", + "f ait", + "g u", + "m ar", + "ê tre", + "l o", + "it é", + "f r", + "a tion", + "en s", + "b r", + "n i", + "l é", + "d is", + "b le", + "m an", + "n é", + "pu is", + "mê me", + "qu es", + "f i", + "e l", + "ag e", + "g ar", + "m oi", + "en ce", + "on t", + "m ain", + "or s", + "au t", + "an ce", + "v en", + "m é", + "s ans", + "e m", + "s é", + "l on", + "h om", + "r o", + "u t", + "c ar", + "ab le", + "i m", + "de r", + "ch er", + "n o", + "vi e", + "au s", + "b e", + "de ux", + "en f", + "o ù", + "t en", + "p h", + "u re", + "te mp", + "p os", + "r ent", + "p é", + "f aire", + "p i", + "tr es", + "ç a", + "an g", + "end re", + "f or", + "p a", + "b on", + "s ou", + "in t", + "pr é", + "s ent", + "t ant", + "n er", + "c er", + "l à", + "l ais", + "pr ès", + "b re", + "c our", + "p et", + "i on", + "i ne", + "com p", + "l ait", + "tr ouv", + "t a", + "ent re", + "son t", + "de v", + "n u", + "temp s", + "d ou", + "r ait", + "b ou", + "qu and", + "jour s", + "l an", + "er s", + "av oir", + "ét é", + "a le", + "p re", + "f ois", + "or te", + "v é", + "m er", + "n on", + "t ous", + "j us", + "cou p", + "t s", + "hom me", + "ê te", + "a d", + "aus si", + "ur s", + "se u", + "or d", + "o b", + "m in", + "g é", + "co re", + "v a", + "v re", + "en core", + "se m", + "i te", + "au tre", + "pr is", + "peu t", + "u e", + "an te", + "m al", + "g n", + "ré p", + "h u", + "si on", + "vo tre", + "di re", + "e z", + "f em", + "leur s", + "m et", + "f in", + "c ri", + "m is", + "t our", + "r ai", + "j am", + "re gar", + "ri en", + "ver s", + "su is", + "p ouv", + "o p", + "v is", + "gr and", + "ant s", + "c or", + "re r", + "ar d", + "c é", + "t ent", + "pr es", + "v ou", + "f a", + "al ors", + "si eur", + "ai ne", + "le r", + "qu oi", + "f on", + "end ant", + "ar ri", + "eu re", + "a près", + "don c", + "it u", + "l è", + "s ait", + "t oi", + "ch a", + "ai l", + "as se", + "i mp", + "vo y", + "con n", + "p la", + "pet it", + "av ant", + "n om", + "t in", + "don t", + "d a", + "s ous", + "e mp", + "per son", + "el les", + "be au", + "par ti", + "ch o", + "pr it", + "tou jours", + "m en", + "r ais", + "jam ais", + "tr av", + "tion s", + "tr ès", + "v oi", + "r en", + "y eux", + "f er", + "v oir", + "pre mi", + "c a", + "g ne", + "h eure", + "r ou", + "e ff", + "no tre", + "ment s", + "t on", + "f ais", + "ce la", + "i er", + "rép on", + "con s", + "ai r", + "ô t", + "p endant", + "i ci", + "tou te", + "j et", + "p ort", + "ét aient", + "p en", + "h é", + "au tres", + "p ère", + "o c", + "quel ques", + "i que", + "l is", + "fem me", + "j ou", + "te ur", + "mon de", + "u se", + "n es", + "d re", + "a ff", + "r ap", + "par t", + "le ment", + "c la", + "f ut", + "quel que", + "pr endre", + "r ê", + "ai lle", + "s ais", + "ch es", + "le t", + "ch ar", + "è res", + "ent s", + "b er", + "g er", + "mo ins", + "e au", + "a î", + "j eu", + "h eur", + "é es", + "tr i", + "po int", + "m om", + "v ent", + "n ouv", + "gr an", + "tr ois", + "s ant", + "tout es", + "con tre", + "è rent", + "che z", + "ave z", + "û t", + "a lle", + "at t", + "p au", + "p orte", + "ouv er", + "b ar", + "l it", + "f ort", + "o t", + "as s", + "pr és", + "cho se", + "v it", + "mon sieur", + "h ab", + "t ête", + "j u", + "te ment", + "c tion", + "v rai", + "la r", + "c et", + "regar d", + "l ant", + "de m", + "s om", + "mom ent", + "il les", + "p le", + "p s", + "b es", + "m ère", + "c l", + "s our", + "y s", + "tr op", + "en ne", + "jus qu", + "av aient", + "av ais", + "jeu ne", + "de puis", + "person ne", + "f it", + "cer t", + "j o", + "g es", + "ou i", + "r est", + "sem b", + "c ap", + "m at", + "m u", + "lon g", + "fr an", + "f aut", + "it i", + "b li", + "che v", + "pr i", + "ent e", + "ain si", + "ch am", + "l ors", + "c as", + "d o", + "il i", + "b é", + "n os", + "an ge", + "su i", + "r it", + "cr o", + "gu e", + "d e", + "e n", + "e s", + "o s", + "l a", + "e r", + "q u", + "a r", + "a n", + "o n", + "qu e", + "a s", + "o r", + "e l", + "d o", + "a l", + "c i", + "u n", + "r e", + "a b", + "i n", + "t e", + "t o", + "s e", + "d i", + "t r", + "d a", + "c on", + "t a", + "s u", + "m i", + "c o", + "t i", + "l e", + "l os", + "n o", + "l o", + "í a", + "c u", + "c a", + "s i", + "v i", + "m e", + "p or", + "m o", + "p ar", + "r a", + "r i", + "la s", + "c h", + "r o", + "m a", + "p er", + "ó n", + "m en", + "de s", + "un a", + "m p", + "s o", + "ab a", + "p u", + "d os", + "t u", + "g u", + "er a", + "de l", + "h a", + "m u", + "l i", + "en t", + "m b", + "h ab", + "es t", + "g o", + "p a", + "r es", + "par a", + "p o", + "á s", + "m os", + "tr a", + "t en", + "an do", + "p i", + "qu i", + "b i", + "m an", + "co mo", + "v e", + "m ás", + "j o", + "ci ón", + "i s", + "t an", + "v o", + "da d", + "c e", + "a do", + "v er", + "f u", + "ci a", + "c er", + "p e", + "c as", + "c ar", + "men te", + "n i", + "su s", + "t ar", + "n a", + "f i", + "t er", + "z a", + "p ro", + "tr o", + "s a", + "l u", + "b a", + "per o", + "s er", + "c es", + "d as", + "d u", + "s in", + "e mp", + "m ar", + "l la", + "e x", + "á n", + "c or", + "i a", + "v a", + "r an", + "ch o", + "g a", + "y o", + "t os", + "c os", + "mi s", + "l es", + "t es", + "v en", + "h o", + "y a", + "en te", + "on es", + "hab ía", + "n u", + "u s", + "p as", + "h i", + "n os", + "es ta", + "la n", + "m as", + "t or", + "l le", + "h e", + "s on", + "b re", + "p re", + "ab an", + "d or", + "í an", + "i r", + "t as", + "é n", + "r u", + "en do", + "a que", + "er o", + "i o", + "qu é", + "m in", + "c ab", + "j a", + "de r", + "t al", + "é s", + "se ñ", + "or a", + "to do", + "la r", + "d on", + "g ar", + "s al", + "p r", + "cu ando", + "j e", + "h u", + "g un", + "b u", + "g i", + "d ar", + "n e", + "r as", + "de n", + "es to", + "par e", + "p en", + "é l", + "tr as", + "c an", + "b o", + "j os", + "mi en", + "pu e", + "c re", + "co mp", + "p on", + "d ía", + "tr os", + "s ab", + "so bre", + "es e", + "mb re", + "er on", + "a ñ", + "m or", + "f or", + "i do", + "por que", + "el la", + "p ri", + "g ran", + "f a", + "c en", + "di s", + "c ri", + "mu y", + "ch a", + "c al", + "es te", + "h as", + "c ó", + "g ra", + "r os", + "p os", + "o b", + "al l", + "aque l", + "j u", + "p res", + "m er", + "di jo", + "c ía", + "ent re", + "z o", + "ci ones", + "bi en", + "mb i", + "el o", + "t ó", + "in a", + "to dos", + "g en", + "ti en", + "est aba", + "de ci", + "ci o", + "h er", + "ñ o", + "l or", + "nu es", + "me di", + "l en", + "vi da", + "f e", + "al i", + "m on", + "c la", + "d re", + "pu es", + "al es", + "vo l", + "m í", + "r ar", + "b le", + "ci on", + "has ta", + "señ or", + "con o", + "a h", + "di os", + "s en", + "es a", + "ú n", + "v ar", + "s an", + "gu i", + "a c", + "o tros", + "ta do", + "bu en", + "ñ a", + "ti emp", + "ha cer", + "j er", + "f er", + "v u", + "f in", + "an a", + "as í", + "an tes", + "t in", + "ve z", + "mien to", + "j ar", + "la b", + "ch e", + "cas a", + "d r", + "es o", + "e go", + "di ó", + "an te", + "est á", + "m al", + "en cia", + "el i", + "í as", + "tiemp o", + "z ar", + "v an", + "m un", + "er ta", + "ta mbi", + "s í", + "b ar", + "a un", + "al e", + "mis mo", + "ent es", + "vi s", + "man o", + "el e", + "na da", + "se gu", + "me j", + "er ra", + "ab le", + "b e", + "ti r", + "un o", + "don de", + "to da", + "des de", + "r en", + "tambi én", + "cu er", + "per son", + "ho mbre", + "o tro", + "li b", + "tr ar", + "cu al", + "ha y", + "a u", + "ca da", + "t aba", + "i mp", + "men to", + "ten ía", + "qu er", + "er an", + "si emp", + "siemp re", + "er to", + "qu í", + "g os", + "pu és", + "el los", + "des pués", + "nu e", + "g an", + "l lo", + "in ter", + "có mo", + "tr i", + "ah ora", + "us te", + "tr aba", + "la do", + "in o", + "po co", + "er te", + "mu jer", + "i m", + "qui er", + "al gun", + "fu e", + "o jos", + "ent on", + "v os", + "es per", + "mu ch", + "o tra", + "a z", + "a d", + "in g", + "e za", + "a quí", + "ci as", + "gu a", + "mu cho", + "deci r", + "es ti", + "i dad", + "al go", + "e z", + "o cu", + "enton ces", + "di do", + "ent os", + "g ri", + "da do", + "i os", + "so l", + "dos e", + "uste d", + "qui en", + "a mi", + "un to", + "f r", + "mi r", + "mej or", + "b as", + "so lo", + "pre gun", + "tu r", + "al g", + "p la", + "to das", + "par te", + "e mb", + "c to", + "mun do", + "tien e", + "tan te", + "pa lab", + "tr an", + "aque lla", + "ci os", + "aun que", + "a y", + "cu en", + "ten er", + "f un", + "res pon", + "all í", + "x i", + "h an", + "pen s", + "con tra", + "tu ra", + "v al", + "di o", + "tr es", + "t re", + "tan to", + "ca min", + "m ó", + "es p", + "a da", + "í o", + "in s", + "ha cia", + "de j", + "est ar", + "i ón", + "g as", + "b er", + "v as", + "no che", + "é r", + "añ os", + "pa dre", + "gu s", + "á r", + "sin o", + "man os", + "ci do", + "es tu", + "a de", + "hu bi", + "vi r", + "b ri", + "ra z", + "ch i", + "pue de", + "men os", + "hab i", + "ho mb", + "ne ces", + "ma y", + "er os", + "r ía", + "he cho", + "es cu", + "l ti", + "án do", + "b us", + "cos as", + "t ú", + "es pa", + "re ci", + "c tor", + "pri m", + "di a", + "de se", + "mien tras", + "h or", + "fu er", + "i da", + "pos i", + "lan te", + "t on", + "an o", + "est as", + "p li", + "ch ar", + "lu ego", + "si ón", + "ci n", + "ti erra", + "m es", + "gu ar", + "ca do", + "en con", + "pr en", + "may or", + "f al", + "e r", + "o n", + "a n", + "t o", + "d i", + "r e", + "l a", + "i n", + "e n", + "a l", + "t a", + "c h", + "e l", + "r i", + "c o", + "t i", + "t e", + "s i", + "r a", + "u n", + "l e", + "l i", + "ch e", + "r o", + "c i", + "c a", + "s e", + "q u", + "m a", + "p o", + "s o", + "i l", + "d o", + "e s", + "v a", + "p er", + "l o", + "c on", + "d el", + "p a", + "m o", + "s a", + "p i", + "d a", + "m i", + "g i", + "s u", + "d e", + "v i", + "z i", + "m e", + "g li", + "n o", + "m en", + "v o", + "t u", + "n on", + "v e", + "t to", + "s t", + "on e", + "an o", + "ch i", + "er a", + "er e", + "f a", + "c e", + "z a", + "un a", + "b i", + "p re", + "s ta", + "o r", + "a r", + "f i", + "on o", + "t ra", + "n a", + "n el", + "n e", + "p ro", + "t ro", + "al e", + "v er", + "n i", + "c u", + "t ti", + "men te", + "del la", + "t er", + "zi one", + "g u", + "p e", + "t ta", + "an do", + "t à", + "al i", + "u o", + "qu el", + "co m", + "s en", + "co me", + "b a", + "al la", + "p ri", + "d u", + "qu es", + "l u", + "on i", + "g gi", + "pa r", + "s si", + "v en", + "in a", + "g a", + "pi ù", + "ci a", + "i m", + "co r", + "m an", + "in o", + "in i", + "t en", + "r an", + "b b", + "g o", + "s to", + "t re", + "a ve", + "a v", + "s ono", + "er i", + "a c", + "s se", + "er o", + "h a", + "s c", + "su l", + "f or", + "v ano", + "po r", + "s ti", + "su o", + "c chi", + "t an", + "z za", + "an che", + "p u", + "i o", + "t te", + "vo l", + "es s", + "s ci", + "co l", + "r u", + "p en", + "f u", + "al l", + "s so", + "s te", + "se m", + "s sa", + "d en", + "a d", + "t ri", + "de i", + "in e", + "ave va", + "men to", + "z z", + "a mo", + "g no", + "f o", + "un o", + "su a", + "g en", + "ri a", + "g e", + "st ra", + "s ì", + "c er", + "ch é", + "b u", + "a p", + "c en", + "d al", + "on a", + "s pe", + "g ni", + "b o", + "t t", + "del le", + "ques to", + "nel la", + "f f", + "d ere", + "an no", + "del l", + "un i", + "bb e", + "an ti", + "g ra", + "s p", + "en e", + "gi o", + "u to", + "qu al", + "gli a", + "qu ando", + "tu tto", + "c an", + "gli o", + "zi oni", + "ca m", + "h o", + "es so", + "s s", + "mo l", + "a t", + "lo ro", + "per ché", + "co sa", + "du e", + "po i", + "ca r", + "s co", + "ci o", + "to r", + "c co", + "c re", + "a m", + "g na", + "te m", + "pri ma", + "lu i", + "co sì", + "qu e", + "gu ar", + "ess ere", + "an i", + "con o", + "b ra", + "al le", + "m on", + "ri o", + "an co", + "cu i", + "s pi", + "vi a", + "g ran", + "gi or", + "a i", + "bi le", + "u l", + "ggi o", + "f e", + "an te", + "ma i", + "ta re", + "in ter", + "in di", + "re bbe", + "sen za", + "so lo", + "zi o", + "e d", + "en te", + "tu tti", + "sta to", + "zi a", + "d alla", + "tu ra", + "mi a", + "vi ta", + "quel la", + "qu a", + "ma r", + "do ve", + "g h", + "al lo", + "sem pre", + "zz o", + "si a", + "mo r", + "do po", + "por ta", + "d re", + "c cia", + "er ano", + "an ni", + "di o", + "chi a", + "en za", + "pro pri", + "qu i", + "m u", + "m b", + "an da", + "c ca", + "o cchi", + "ques ta", + "f fi", + "le i", + "par te", + "d on", + "r on", + "mi o", + "tan to", + "ri s", + "o gni", + "di s", + "r in", + "fa r", + "men ti", + "t el", + "anco ra", + "f ra", + "fa tto", + "man i", + "sen ti", + "p ra", + "tem po", + "es si", + "b bi", + "f in", + "a re", + "la re", + "per s", + "f on", + "b el", + "so r", + "d er", + "pre n", + "an za", + "di re", + "pi e", + "o ra", + "ver so", + "se gu", + "al tro", + "ta to", + "ca to", + "a to", + "vol ta", + "c c", + "fa re", + "pa re", + "ci ò", + "li b", + "bi li", + "n uo", + "s er", + "quel lo", + "co lo", + "p po", + "ca sa", + "tro va", + "o re", + "f er", + "r ono", + "d es", + "mol to", + "al mente", + "s ca", + "vo le", + "t ali", + "sul la", + "s ce", + "men o", + "an to", + "p un", + "s tu", + "ca pi", + "so l", + "gi u", + "m ini", + "m ano", + "z e", + "pi a", + "par ti", + "s al", + "la vo", + "ver o", + "r si", + "al tri", + "es ti", + "s cia", + "suo i", + "gli e", + "so tto", + "b ene", + "sc ri", + "t ale", + "de gli", + "n u", + "al c", + "uo mo", + "p el", + "f re", + "po te", + "es sa", + "s cu", + "si gno", + "el e", + "st ro", + "u ti", + "di a", + "si one", + "g re", + "f ini", + "ar ri", + "l un", + "c ri", + "e si", + "pa ssa", + "r à", + "men tre", + "an d", + "h anno", + "el o", + "u sci", + "gi a", + "gi à", + "di e", + "m ina", + "b e", + "ti ca", + "gior no", + "t in", + "es se", + "mo do", + "c al", + "s pa", + "propri o", + "l en", + "o ri", + "con tro", + "st ru", + "di ven", + "di sse", + "ra to", + "no i", + "v ere", + "pu ò", + "di ce", + "s an", + "es a", + "c ci", + "se con", + "re n", + "c cio", + "qual che", + "tu tta", + "g g", + "mon do", + "for ma", + "p li", + "m ma", + "pen sa", + "de va", + "tu r", + "fo sse", + "so pra", + "ta mente", + "n ess", + "qu anto", + "ra ga", + "un que", + "ca re", + "st re", + "gran de", + "pi cco", + "guar da", + "b en", + "nel l", + "a ff", + "po ssi", + "pre sen", + "r ò", + "pa ro", + "tu a", + "v in", + "an e", + "a s", + "ste sso", + "da v", + "ne i", + "nel le", + "gh i", + "pi o", + "ta r", + "an a", + "la to", + "si d", + "f ine", + "f uo", + "m er", + "z o", + "qua si", + "ul ti", + "i to", + "su e", + "si e", + "f il", + "allo ra", + "m in", + "ven i", + "t ano", + "el lo", + "d e", + "r a", + "e s", + "d o", + "e n", + "q u", + "c o", + "a s", + "o s", + "e r", + "a r", + "s e", + "qu e", + "a n", + "i n", + "i s", + "t o", + "ã o", + "t e", + "d a", + "m a", + "e l", + "t a", + "o r", + "i a", + "r e", + "e m", + "a l", + "co m", + "p a", + "o u", + "c a", + "u m", + "r o", + "v a", + "t i", + "s o", + "m en", + "n ão", + "h a", + "co n", + "m e", + "r i", + "pa ra", + "p o", + "d i", + "s a", + "v o", + "u ma", + "c i", + "n a", + "p or", + "n o", + "g u", + "s u", + "h o", + "an do", + "t ra", + "e i", + "v i", + "e u", + "i m", + "do s", + "el e", + "r es", + "m o", + "en t", + "f i", + "l a", + "e ra", + "l e", + "de s", + "el a", + "men te", + "l h", + "p er", + "l i", + "ç ão", + "m as", + "t er", + "m u", + "es t", + "v e", + "g o", + "l o", + "u s", + "ma is", + "v er", + "c ê", + "in ha", + "vo cê", + "f a", + "t u", + "c u", + "p ar", + "com o", + "p ro", + "s i", + "m os", + "e c", + "p re", + "d as", + "ç a", + "es ta", + "s er", + "u n", + "da de", + "d is", + "f o", + "e x", + "c h", + "i r", + "ra n", + "t ar", + "en te", + "g a", + "t r", + "p e", + "t os", + "b o", + "c ia", + "p en", + "c ar", + "s en", + "su a", + "se m", + "c as", + "f or", + "to u", + "n os", + "te m", + "r ia", + "m es", + "se u", + "co r", + "o n", + "a o", + "p os", + "ra m", + "v el", + "é m", + "t en", + "po de", + "t es", + "esta va", + "c e", + "b a", + "qu ando", + "m i", + "qu er", + "men to", + "se gu", + "t as", + "is so", + "mu i", + "g ar", + "t ro", + "d u", + "fa z", + "õ es", + "p es", + "an to", + "l u", + "p i", + "i x", + "ve z", + "s im", + "j a", + "p r", + "m in", + "b e", + "ra s", + "m an", + "p res", + "est á", + "c er", + "b re", + "p as", + "d ia", + "m b", + "dis se", + "n i", + "r os", + "es se", + "v ia", + "o lh", + "is a", + "an te", + "ê n", + "z a", + "qu i", + "b i", + "t inha", + "me u", + "s ão", + "m inha", + "a c", + "ri o", + "m ar", + "a t", + "p el", + "mui to", + "ta l", + "to r", + "fo i", + "h or", + "j o", + "b em", + "g i", + "f al", + "vo l", + "po n", + "di z", + "l ar", + "gu n", + "m or", + "r u", + "par ec", + "ç o", + "do r", + "pes so", + "n e", + "f er", + "b er", + "p u", + "po is", + "in a", + "es p", + "d ar", + "en do", + "de n", + "so bre", + "co s", + "p ri", + "al i", + "mes mo", + "ç ões", + "g ra", + "se us", + "me i", + "b ra", + "vi da", + "an tes", + "b ri", + "at é", + "ên cia", + "lh e", + "ti v", + "m ã", + "al g", + "qu anto", + "s ó", + "g os", + "de r", + "t ão", + "tu do", + "ent ão", + "r ou", + "es s", + "in da", + "b al", + "in do", + "ci o", + "n do", + "j á", + "va m", + "re i", + "l es", + "ei to", + "v is", + "tem po", + "de pois", + "c ha", + "m el", + "ch e", + "l ha", + "a inda", + "faz er", + "con tra", + "p ou", + "per gun", + "de ix", + "ta mb", + "ra r", + "al a", + "v en", + "t in", + "pel o", + "tamb ém", + "fi ca", + "pre c", + "el es", + "tra n", + "ha via", + "l á", + "to dos", + "j u", + "qu al", + "c an", + "ta do", + "cas a", + "es sa", + "n as", + "g em", + "m em", + "se i", + "na da", + "sen ti", + "c ri", + "ó s", + "de u", + "ei ro", + ". .", + "f un", + "as sim", + "s ou", + "ent re", + "com e", + "i or", + "h ar", + "f e", + "por que", + "s or", + "f in", + "ta mente", + "a qui", + "cu l", + "t ó", + "for ma", + "s ar", + "ou tra", + "olh os", + "i ma", + "m im", + "a go", + "in s", + "co u", + "g ran", + "v al", + "pesso as", + "era m", + "ei ra", + "a que", + "com p", + "de i", + "p ela", + "co isa", + "m ão", + "con h", + "ca da", + "ago ra", + "ia m", + "h á", + "con s", + "su as", + "gu ém", + "o b", + "l an", + "es ti", + "á s", + "la do", + "in ter", + "ca be", + "por ta", + "n em", + "í vel", + "r is", + "j e", + "n un", + "sem pre", + "con segu", + "h as", + "tra bal", + "f u", + "le v", + "l em", + "l as", + "va i", + "tr os", + "t ante", + "te i", + "pr ó", + "que m", + "tu ra", + "on de", + "cabe ça", + "nun ca", + "men tos", + "h um", + "de le", + "ver dade", + "t á", + "h os", + "el i", + "ent es", + "m er", + "alg um", + "diz er", + "s in", + "pen as", + "n ós", + "en quanto", + "ou tro", + "l ho", + "es te", + "mel hor", + "est ar", + "g an", + "b ar", + "pri mei", + "a u", + "i u", + "pen sa", + "a penas", + "p ra", + "es tou", + "con te", + "res pon", + "ho mem", + "do is", + "a do", + "c al", + "a b", + "l os", + "ç as", + "pou co", + "sen hor", + "t ando", + "esp era", + "pa i", + "ri os", + "no i", + "i da", + "ba ix", + "as e", + "is as", + "f r", + "ho ra", + "mu ndo", + "pas sa", + "fi car", + "to do", + "se ja", + "al mente", + "â n", + "c lar", + "a d", + "in c", + "f os", + "lo n", + "g ri", + "ou vi", + "v em", + "g e", + "ta va", + "á rio", + "mo n", + "s os", + "in ho", + "ma l", + "t an", + "t re", + "gran de", + "ran do", + "b u", + "v ou", + "ê s", + "co isas", + "a conte", + "lh er", + "g en", + "ci on", + "an os", + "i do", + "tal vez", + "est ão", + "li v", + "sa b", + "su r", + "ou tros", + "c re", + "qual quer", + "g ou", + "t ri", + "l í", + "tiv esse", + "ra do", + "prec isa", + "mã e", + "su s", + "t anto", + "de la", + "men os", + "s al", + "en tra", + "p é", + "ma ior", + "noi te", + "ti va", + "p ala", + "so n", + "ra ção", + "de us", + "s as", + "un i", + "l or", + "u l", + "in te", + "f ei", + "an o", + "par ti", + "pala v", + "tr ás", + "par te", + "b el", + "ci dade", + "lu gar", + "v os", + "vez es", + "do u", + "en contra", + "tr u", + "e ci", + "a r", + "e r", + "a n", + "e n", + "i n", + "i r", + "o r", + "d e", + "a k", + "ı n", + "a l", + "d i", + "d a", + "b u", + "b ir", + "y or", + "i l", + "e k", + "y a", + "m a", + "l a", + "e l", + "u n", + "k a", + "l ar", + "i m", + "d ı", + "e t", + "o n", + "d u", + "o l", + "e y", + "t ı", + "m i", + "h a", + "b a", + "l er", + "ü n", + "m ı", + "i z", + "l e", + "ı r", + "m e", + "i s", + "n e", + "o k", + "t a", + "s a", + "u m", + "r a", + "g ö", + "i k", + "s ı", + "d en", + "e s", + "b il", + "t i", + "l ı", + "ü z", + "i ç", + "ü r", + "g i", + "u r", + "t e", + "b en", + "d an", + "i y", + "ı m", + "u z", + "v e", + "c ak", + "a y", + "c e", + "i ş", + "ın ı", + "i yor", + "ba ş", + "d ü", + "a t", + "a m", + "g el", + "de ğ", + "k ar", + "i ̇", + "m u", + "e v", + "ö y", + "bu n", + "v ar", + "ya p", + "s en", + "an a", + "s un", + "in i", + "gö r", + "y ı", + "k i", + "l i", + "ar a", + "al ı", + "on u", + "ç ı", + "ş ey", + "s ın", + "k ı", + "ka d", + "s e", + "t an", + "a ğ", + "değ il", + "s in", + "ü k", + "a z", + "ç ok", + "s on", + "ş ı", + "b i", + "ü l", + "t u", + "v er", + "iç in", + "g e", + "k en", + "ey e", + "ol du", + "mı ş", + "y e", + "k al", + "m ek", + "l an", + "öy le", + "yor du", + "er i", + "y üz", + "mi ş", + "b e", + "m ak", + "o la", + "in e", + "y an", + "h er", + "c ek", + "yor um", + "b ak", + "ü m", + "ö n", + "lar ı", + "o ğ", + "d er", + "kad ar", + "h al", + "ar ı", + "s t", + "s an", + "ın da", + "du r", + "g ün", + "v a", + "y ok", + "y er", + "dı m", + "k o", + "da ha", + "l u", + "ın a", + "di m", + "e m", + "bil ir", + "ik i", + "s iz", + "s i", + "n a", + "di ğ", + "s u", + "b ü", + "ha y", + "s or", + "dü ş", + "ü ç", + "un u", + "ö r", + "d ir", + "m ü", + "c a", + "am an", + "f ak", + "a da", + "e de", + "son ra", + "h iç", + "ak i", + "ğ ı", + "bu l", + "r u", + "ma z", + "an la", + "bu ra", + "ge ç", + "ma ya", + "l en", + "k onu", + "c i", + "c u", + "d in", + "t ek", + "z aman", + "el er", + "ö z", + "dı r", + "gi bi", + "o t", + "ş a", + "g er", + "ler i", + "k im", + "k u", + "fak at", + "y ar", + "gö z", + "c ı", + "yor sun", + "b ek", + "in de", + "r o", + "p ek", + "bun u", + "l ik", + "m an", + "il er", + "e di", + "ö l", + "s ür", + "b in", + "s ır", + "çı k", + "sı l", + "al ar", + "k es", + "y ak", + "ç ek", + "yı l", + "e cek", + "ı z", + "gi t", + "ka p", + "a ma", + "ı l", + "lar ın", + "b iz", + "tı r", + "o y", + "an cak", + "d oğ", + "ç a", + "b ana", + "ş im", + "baş la", + "l ü", + "ma dı", + "ben i", + "t ir", + "y ük", + "lı k", + "be ş", + "b el", + "b er", + "m er", + "na sıl", + "tı k", + "k e", + "t ür", + "a v", + ". .", + "d aki", + "p ar", + "t er", + "ce ğ", + "t en", + "z ı", + "iy i", + "d ok", + "ben im", + "c ağ", + "n er", + "y en", + "ş u", + "me z", + "düş ün", + "ken di", + "şim di", + "y ol", + "y u", + "de v", + "is te", + "s ek", + "ma m", + "s öyle", + "di k", + "t o", + "k ur", + "oldu ğ", + "s ını", + "t ar", + "bil iyor", + "k an", + "y al", + "m eye", + "mu ş", + "f a", + "ka ç", + "bil e", + "iy e", + "t ü", + "e f", + "tı m", + "ev et", + "ç o", + "y et", + "g en", + "bura da", + "t im", + "bir az", + "es i", + "k or", + "doğ ru", + "in in", + "kı z", + "di ye", + "d ör", + "et ti", + "on un", + "is ti", + "ğ i", + "h e", + "s ana", + "ü ş", + "ar ka", + "hay ır", + "kar şı", + "h ar", + "il e", + "h ak", + "ı yor", + "ne den", + "s ev", + "sı z", + "ço cu", + "me m", + "ç alı", + "ol ur", + "b ır", + "g ir", + "is e", + "i h", + "c an", + "k ır", + "d ön", + "b öyle", + "sen i", + "! \"", + "al t", + "dör t", + "s öy", + "o ş", + "mu sun", + "la ş", + "h an", + "i p", + "ka y", + "h em", + "bü yük", + "a ç", + "bır ak", + "mi sin", + "s öz", + "u l", + "değ iş", + "ün ü", + "g ül", + "k ö", + "kar ı", + "ta mam", + "ol u", + "r ar", + "yen i", + "la m", + "mış tı", + "ya ş", + "al a", + "in iz", + "kad ın", + "bun un", + "m ey", + "al tı", + "y i", + "s o", + "in den", + "sen in", + "ya t", + "to p", + "s er", + "is i", + "d ün", + "s es", + "hiç bir", + "y on", + "d ın", + "t ün", + "baş ka", + "a s", + "he p", + "i t", + "ir mi", + "dev am", + "ola cak", + "ar tık", + "r e", + "dur um", + "im iz", + "üz el", + "ler ini", + "sa ğ", + "p ro", + "ger ek", + "y irmi", + "ş ek", + "ba ğ", + "me di", + "lar a", + "a h", + "t ur", + "y ür", + "ma sı", + "ka tı", + "de di", + "g ü", + "sor un", + "el i", + "ün e", + "mı z", + "yap ı", + "m il", + "ğ ını", + "t ara", + "m en", + "ha t", + "var dı", + "m et", + "konu ş", + "ar ak", + "lar ak", + "çocu k", + "bü tün", + "l ey", + "d ür", + "g üzel", + "ay ı", + "yap a", + "n ı", + "ay r", + "ö ne", + "yordu m", + "b an", + "i̇ ş", + "du m", + "un a", + "on a", + "yor lar", + "lar ını", + "çı kar", + "z an", + "se ç", + "l iyor", + "t ak", + "şı k", + "tek rar", + "a ş", + "e ş", + "miş ti", + "f ar", + "k in", + "im i", + "i f", + "e ğ", + "gi di", + "le ş", + "başla dı", + "gi de", + "ot ur", + "d de", + "ın dan", + "üz er", + "ın ın", + "n ız", + "u y", + "ye di", + "ka t", + "o larak", + "la dı", + "yal nız", + "ba h", + "iy et", + "m al", + "s ak", + "a çık", + "sın da", + ".. .", + "in san", + "ay nı", + "e der", + "is tan", + "uz un", + "sa h", + "d o", + "g eri", + "er ek", + "ol an", + "ger çek", + "f en", + "al an", + "dı ş", + "alı k", + "far k", + "ü st", + "sa de", + "r i", + "k iş", + "l dı", + "z or", + "et ir", + "her kes", + "s al", + "ö mer", + "s el", + "un da", + "ha f", + "bun a", + "y dı", + "pek i", + "ada m", + "ha z", + "sın a", + "kap ı", + "gör üş", + "sade ce", + "al dı", + "gel di", + "i e", + "n ie", + "n a", + "r z", + "s z", + "c z", + "p o", + "s t", + "c h", + "i ę", + "d z", + "n i", + "a ł", + "r a", + "j e", + "r o", + "d o", + "s ię", + "z a", + "g o", + "e m", + "w i", + "c i", + "rz e", + "k o", + "l e", + "l i", + "w a", + "t o", + "k a", + "m i", + "ż e", + "t a", + "w ie", + "b y", + "m o", + "w y", + "rz y", + "ł a", + "j a", + "n o", + "ł o", + "w o", + "p a", + "m a", + "t e", + "t y", + "n y", + "k i", + "d a", + "n e", + "dz ie", + "dz i", + "cz y", + "c ie", + "m y", + "p rze", + "d y", + "o d", + "l a", + "k ie", + "r y", + "st a", + "j ą", + "ó w", + "c e", + "p rzy", + "c o", + "k u", + "m ie", + "sz y", + "cz e", + "r e", + "b a", + "s i", + "b ie", + "m u", + "w e", + "c y", + "ni a", + "ś ci", + "sz e", + "je st", + "k t", + "s a", + "b o", + "t u", + "ż y", + "n ą", + "b i", + "r u", + "a le", + "kt ó", + "p ra", + "ał a", + "m nie", + "p ie", + "ł y", + "cz a", + "ja k", + "ro z", + "r ó", + "l u", + "z na", + "g a", + "ra z", + "ł u", + "ta k", + "j u", + "p i", + "ś ć", + "s o", + "wi a", + "m ó", + "ch o", + "w szy", + "p e", + "s po", + "c a", + "g dy", + "w ał", + "w ię", + "d e", + "b e", + "p ro", + "ł em", + "j ę", + "s k", + "z e", + "l o", + "g i", + "r ę", + "do b", + "d u", + "ju ż", + "st o", + "b ę", + "ał em", + "sz a", + "m e", + "po d", + "d la", + "pa n", + "n ę", + "z o", + "mo że", + "ś li", + "s ie", + "ał o", + "t em", + "l ko", + "ny ch", + "po wie", + "c ię", + "s u", + "ty lko", + "i n", + "b u", + "na j", + "ch a", + "te go", + "p u", + "s ki", + "ne go", + "wszy st", + "sz cze", + "je d", + "je j", + "t wo", + "ą d", + "ś my", + "cz ę", + "wa ć", + "je go", + "ż a", + "i m", + "s y", + "pra w", + "ty m", + "któ ry", + "ał y", + "t rze", + "nie j", + "s e", + "ny m", + "i ch", + "o b", + ". .", + "g ło", + "ją c", + "mó wi", + "s ka", + "o n", + "ne j", + "s łu", + "w ła", + "bę dzie", + "d ę", + "p ó", + "be z", + "ni c", + "p ła", + "ś cie", + "mi a", + "s ą", + "t rzy", + "kie m", + "by ł", + "mo g", + "ro bi", + "ta m", + "c u", + "te n", + "m ię", + "z y", + "pe w", + "ci a", + "my ś", + "prze d", + "s ko", + "n u", + "któ re", + "a l", + "l ę", + "w sze", + "ą c", + "by ło", + "so bie", + "p y", + "ci ą", + "ba r", + "je szcze", + "h a", + "t ę", + "b ra", + "cza s", + "sz ę", + "g ł", + "k ę", + "ma r", + "cz u", + "prze z", + "f i", + "s ło", + "w z", + "k to", + "k ów", + "cz o", + "li śmy", + "st ra", + "wię c", + "r ą", + "ma m", + "w ó", + "rz a", + "g ro", + "no ści", + "f a", + "we t", + "ną ł", + "ś mie", + "na wet", + "mu si", + "s wo", + "te j", + "w ą", + "w u", + "wi ą", + "ni u", + "cz ą", + "b li", + "dz o", + "s kie", + "n em", + "je śli", + "cze go", + "ch y", + "d ł", + "ty ch", + "by m", + "ż o", + "e ś", + "si ą", + "kie dy", + "na s", + "w ró", + "dz e", + "d ro", + "t ra", + "r ów", + "pa ni", + "z ie", + "ku l", + "na d", + "ch wi", + "ni m", + "t ro", + "by ć", + "cho dzi", + "ni o", + "dob rze", + "te raz", + "wo kul", + "co ś", + "k ł", + "pie r", + "h e", + "g dzie", + "dz y", + "p ię", + "d ź", + "k ą", + "g ó", + "z da", + "ch ce", + "st ę", + "o r", + "ś wia", + "wszyst ko", + "st ro", + "pe ł", + "wie m", + "wie l", + "ka ż", + "ki m", + "rz u", + "s ły", + "jed na", + "z u", + "myś l", + "mó j", + "g u", + "wa r", + "jest em", + "ó ż", + "mie j", + "mo ż", + "k ła", + "re sz", + "d łu", + "st wo", + "n ię", + "ma sz", + "że by", + "nie m", + "ja kie", + "st y", + "ni ą", + "we j", + "o j", + "g ra", + "s ła", + "no ść", + "z ło", + "sz czę", + ".. .", + "r i", + "le j", + "we go", + "c ał", + "dzi ał", + "ki ch", + "dz a", + "dz ię", + "o czy", + "zo sta", + "cz ło", + "na m", + "ki l", + "o na", + "sz u", + "w ę", + "pa r", + "mi ał", + "st rze", + "ce j", + "e j", + "zna j", + "da ć", + "miej s", + "k ró", + "k ry", + "bar dzo", + "si a", + "z i", + "ś nie", + "l ą", + "g ie", + "cie bie", + "d ni", + "st u", + "po trze", + "wokul ski", + "u wa", + "u mie", + "jedna k", + "k ra", + "wró ci", + "czło wie", + "czy ć", + "by ła", + "że li", + "m ę", + "c ę", + "z robi", + "mog ę", + "pro wa", + "r em", + "nie ch", + "cz nie", + "k ro", + "t ą", + "ch ci", + "b ro", + "dzie ć", + "sz ą", + "pa d", + "t rz", + "t ru", + "je m", + "a ni", + "t ów", + "a r", + "d ru", + "ta j", + "rze kł", + "sa m", + "st e", + "nie go", + "ta kie", + "w ała", + "to wa", + "ka pła", + "wi dzi", + "po dob", + "dz ę", + "t ał", + "stę p", + "b ą", + "po ko", + "w em", + "g ę", + "a by", + "g e", + "al bo", + "s pra", + "z no", + "de n", + "s mo", + "je sz", + "k się", + "jest eś", + "po z", + "ni gdy", + "k sią", + "c óż", + "w s", + "po w", + "t ka", + "ś wie", + "sz ka", + "sa mo", + "s ł", + "rz ę", + "na le", + "chce sz", + "ni k", + "p ę", + "chy ba", + "cią g", + "ją cy", + "wo j", + "na sze", + "mnie j", + "wię cej", + "z wy", + "o sta", + "f e", + "wa ż", + "h o", + "se r", + "śmie r", + "wie r", + "dz ą", + "za ś", + "gdy by", + "ja ki", + "wo l", + "wi n", + "d ą", + "ści a", + "roz ma", + "wa l", + "pa nie", + "sta r", + "ka z", + "je żeli", + "d em", + "w ra", + "ko ń", + "sie bie", + "zno wu", + "p ró", + "cz em", + "st wa", + "i sto", + "pó ł", + "d ał", + "ko bie", + "ała m", + "wy ch", + "ce sa", + "ni ch", + "za wsze", + "dzi ć", + "te ż", + "le pie", + "pro szę", + "k re", + "t wa", + "o t", + "ł ą", + "ch u", + "c ą", + "p rz", + "ł e", + "sze dł", + "od powie", + "my śli", + "ś wią", + "e n", + "e r", + "d e", + "a n", + "e t", + "i j", + "i n", + "e l", + "a a", + "s t", + "o r", + "g e", + "i s", + "a t", + "i e", + "c h", + "o n", + "e en", + "h et", + "i t", + "v er", + "aa r", + "a l", + "o or", + "g en", + "v an", + "o p", + "d en", + "h e", + "o m", + "t e", + "w e", + "i k", + "r e", + "z e", + "ij n", + "d at", + "b e", + "d er", + "in g", + "o e", + "ij k", + "a an", + "ch t", + "v oor", + "l e", + "i et", + "r o", + "m o", + "k en", + "z ijn", + "m en", + "i g", + "j e", + "n iet", + "a r", + "o o", + "i d", + "u n", + "i l", + "s ch", + "mo et", + "st e", + "u r", + "o l", + "he b", + "u it", + "g el", + "w ij", + "a s", + "m e", + "t en", + "w or", + "o u", + "v en", + "l en", + "aa t", + "d it", + "m et", + "r a", + "b en", + "s p", + "o ver", + "d ie", + "n o", + "w er", + "l ijk", + "f t", + "s l", + "an d", + "v e", + "t er", + "i er", + "i en", + "t o", + "d aar", + "g r", + "b el", + "de ze", + "d u", + "a g", + "k an", + "wor den", + "in gen", + "moet en", + "n en", + "on der", + "heb ben", + "r u", + "oo k", + "s en", + "c t", + "k t", + "no g", + "aa l", + "w as", + "u l", + "e er", + "b ij", + "m ijn", + "p ro", + "v ol", + "d o", + "k om", + "at ie", + "e ft", + "k el", + "al s", + "r ij", + "he id", + "a f", + "st el", + "m aar", + "a p", + "we e", + "a d", + "he eft", + "w aar", + "i cht", + "d an", + "er en", + "n e", + "w el", + "w at", + "w il", + "a cht", + "aa g", + "ge b", + "c on", + "z o", + "k e", + "b et", + "h ij", + "d ig", + "k un", + "u w", + "d t", + "d oor", + "t ij", + "a m", + "an g", + "on d", + "er s", + "is ch", + "ge en", + "i ge", + "ge v", + "ve el", + "n u", + "m a", + "on s", + "o f", + "b l", + "n aar", + "g ro", + "p l", + "an der", + "at en", + "kun nen", + "e cht", + "h ier", + "g oe", + "an t", + "u s", + "t wee", + "on t", + "de lijk", + "el e", + "u ur", + "al le", + "t oe", + "me er", + "i st", + "n a", + "n ie", + "on ze", + "l o", + "i m", + "p en", + "h ad", + "tij d", + "h oe", + "to t", + "z ou", + "a k", + "aa k", + "a men", + "d r", + "w oor", + "s e", + "wor dt", + "o t", + "gel ijk", + "g aan", + "i c", + "g er", + "k er", + "el d", + "e m", + "h ou", + "de l", + "z en", + "z el", + "te gen", + "b o", + "kom en", + "c om", + "i gen", + "e it", + "wer k", + "goe d", + "z al", + "z ij", + "sl ag", + "e s", + "z ien", + "a st", + "echt er", + "it ie", + "t ie", + "el ijk", + "m is", + "isch e", + "bel an", + "h aar", + "i ch", + "b er", + "h an", + "v r", + "al e", + "c i", + "gr ijk", + "in d", + "do en", + "l and", + "belan grijk", + "p un", + "op en", + "ct ie", + "zel f", + "m ij", + "it eit", + "ste m", + "me e", + "ar en", + "al l", + "b r", + "re cht", + "d ien", + "h u", + "g aat", + "pro b", + "m oe", + "p er", + "a u", + "ul len", + "z ich", + "daar om", + "or m", + "k l", + "v o", + "en t", + "st aat", + "z it", + "du i", + "n at", + "du s", + "d s", + "ver slag", + "kel ijk", + "prob le", + "w et", + "ge m", + "c r", + "i on", + "p r", + "sch ap", + "g d", + "h un", + "z a", + "er d", + "z et", + "st aan", + "st r", + "m aal", + "in der", + "e id", + "st en", + "p ar", + "k ken", + "ge d", + "z ullen", + "re s", + "men sen", + "j aar", + "re gel", + "ie der", + "vol gen", + "ge ven", + "e ven", + "l u", + "bl ij", + "i ë", + "k o", + "u we", + "m an", + "ma ken", + "l ie", + "g a", + "oe k", + "nie uwe", + "b aar", + "h o", + "h er", + "in ter", + "ander e", + "ru ik", + "s u", + "a gen", + "or t", + "m er", + "ou w", + "st er", + "wil len", + "aa kt", + "h oo", + "an den", + "f f", + "l ig", + "t re", + "s amen", + "ze er", + "dui delijk", + "ant woor", + "he el", + "men t", + "pun t", + "hou den", + "we g", + "vr aag", + "gel e", + "een s", + "be sch", + "om en", + "er g", + "do el", + "d ag", + "sp e", + "ur en", + "ing s", + "or en", + "l ang", + "de len", + "m ar", + "ste un", + "in nen", + "p ol", + "o on", + "i de", + "s n", + "s ie", + "r icht", + "z onder", + "no dig", + "all een", + "m id", + "ra gen", + "iet s", + "ver sch", + "geb ruik", + "st u", + "ro uw", + "stel len", + "be g", + "men ten", + "v in", + "eer ste", + "l aat", + "gro ot", + "oo d", + "to ch", + "l aten", + "aar d", + "s le", + "de el", + "st and", + "pl aat", + "re e", + "bet re", + "d i", + "l id", + "uit en", + "ra cht", + "bel eid", + "g et", + "ar t", + "st ie", + "st aten", + "g gen", + "re ken", + "e in", + "al en", + "m ing", + "mo gelijk", + "gro te", + "al tijd", + "z or", + "en kel", + "w ik", + "pol itie", + "e igen", + "el k", + "han del", + "g t", + "k we", + "m aat", + "el en", + "i p", + "v rij", + "s om", + "je s", + "aa m", + "hu is", + "v al", + "we er", + "lid staten", + "k ing", + "k le", + "be d", + "gev al", + "stel l", + "a i", + "wik kel", + "kwe stie", + "t al", + "ste e", + "a b", + "h el", + "kom st", + "p as", + "s s", + "it u", + "i den", + "eer d", + "m in", + "c e", + "p o", + "twee de", + "proble em", + "w aren", + "us sen", + "sn el", + "t ig", + "ge w", + "j u", + "ul t", + "ne men", + "com mis", + "versch il", + "k on", + "z oek", + "k rij", + "gr aag", + "den k", + "l anden", + "re den", + "be sl", + "oe g", + "bet er", + "he den", + "m ag", + "p e", + "bo ven", + "a c", + "con t", + "f d", + "h ele", + "k r", + "v ier", + "w in", + "ge z", + "k w", + "m il", + "v or", + "he m", + "ra m", + "aa s", + "ont wikkel", + "dr ie", + "v aak", + "plaat s", + "l a", + "g ang", + "ij f", + "f in", + "nat uur", + "t ussen", + "u g", + "in e", + "d a", + "b at", + "kom t", + "w acht", + "aa d", + "u t", + "é n", + "acht er", + "geb ie", + "ver k", + "lig t", + "c es", + "nie uw", + "van d", + "s t", + "n í", + "j e", + "p o", + "c h", + "r o", + "n a", + "s e", + "t o", + "n e", + "l e", + "k o", + "l a", + "d o", + "r a", + "n o", + "t e", + "h o", + "n ě", + "v a", + "l i", + "l o", + "ř e", + "c e", + "d e", + "v e", + "b y", + "n i", + "s k", + "t a", + "n á", + "z a", + "p ro", + "v o", + "v ě", + "m e", + "v á", + "s o", + "k a", + "r á", + "v y", + "z e", + "m i", + "p a", + "t i", + "st a", + "m ě", + "n é", + "ř i", + "ř í", + "m o", + "ž e", + "m a", + "j í", + "v ý", + "j i", + "d ě", + "r e", + "d a", + "k u", + "j a", + "c i", + "r u", + "č e", + "o b", + "t ě", + "m u", + "k y", + "d i", + "š e", + "k é", + "š í", + "t u", + "v i", + "p ře", + "v í", + "s i", + "n ý", + "o d", + "so u", + "v é", + "n y", + "r i", + "d y", + "b u", + "b o", + "t y", + "l á", + "l u", + "n u", + "ž i", + "m á", + "st i", + "c í", + "z á", + "p ra", + "sk é", + "m í", + "c o", + "d u", + "d á", + "by l", + "st o", + "s a", + "t í", + "je d", + "p ří", + "p ři", + "t é", + "s í", + "č i", + "v ní", + "č a", + "d í", + "z i", + "st u", + "p e", + "b a", + "d ní", + "ro z", + "va l", + "l í", + "s po", + "k á", + "b e", + "p i", + "no u", + "ta k", + "st e", + "r y", + "l é", + "vě t", + "se m", + "p ě", + "ko n", + "ne j", + "l y", + "ko u", + "ý ch", + "b ě", + "p r", + "f i", + "p rá", + "a le", + "ja ko", + "po d", + "ž í", + "z í", + "j sou", + "j sem", + "ch o", + "l ní", + "c ké", + "t á", + "m y", + "a k", + "h u", + "va t", + "pře d", + "h la", + "k e", + "st á", + "č í", + "š i", + "s le", + "k la", + "š tě", + "lo u", + "m ů", + "z na", + "ch á", + "o r", + "p ů", + "h a", + "b i", + "ta ké", + "d ů", + "no st", + "t ře", + "te r", + "p u", + "i n", + "v r", + "ve l", + "sk u", + "v še", + "t ní", + "do b", + "by la", + "č ní", + "ja k", + "v u", + "je ho", + "b ý", + "vá ní", + "ný ch", + "po u", + "te n", + "t ři", + "v z", + "st ře", + "d va", + "h le", + "č á", + "no sti", + "c k", + "v š", + "vo u", + "s u", + "h e", + "h ra", + "je n", + "s y", + "da l", + "po z", + "s lo", + "te l", + "d ru", + "de n", + "vš ak", + "g i", + "k dy", + "by lo", + "bu de", + "st ra", + "j ší", + "m é", + "me n", + "vý ch", + "ní m", + "s m", + "ko li", + "r ů", + "t ra", + "mů že", + "ne ní", + "ho d", + "b í", + "do u", + "sk a", + "t ý", + "st ě", + "u je", + "s á", + "pě t", + "ne s", + "k rá", + "to m", + "st ví", + "v ně", + "se d", + "s vé", + "p í", + "z o", + "mu sí", + "u ž", + "tí m", + "jí cí", + "jed no", + "t r", + "ča s", + "e v", + "č ty", + "sk ý", + "ni c", + "ev ro", + "to ho", + "h y", + "k ter", + "r ní", + "st í", + "s vě", + "pa k", + "vše ch", + "k ů", + "n g", + "á d", + "chá zí", + "a ni", + "a r", + "jed na", + "bý t", + "t ro", + "k ra", + "pr vní", + "m no", + "ské ho", + "p á", + "p la", + "le m", + "ne bo", + "ke m", + "st ro", + "s la", + "né ho", + "z de", + "dal ší", + "ř a", + "čty ři", + "h rá", + "dru h", + "l ně", + "v la", + "sk ých", + "š ko", + "pů so", + "pro to", + "v ů", + "sk á", + "ve n", + "še st", + "d ně", + "je ště", + "me zi", + "te k", + "s ko", + "ch a", + "ně koli", + "be z", + "g ra", + "ji ž", + "č ně", + "j á", + "s lu", + "z ná", + "ve r", + "sed m", + "k ro", + "ta m", + "a no", + "v lá", + "o sm", + "byl y", + "vá m", + "ck ý", + "te ch", + "dě ji", + "vel mi", + "le ži", + "va la", + "l ý", + "t vo", + "spo le", + "ch u", + "stu p", + "mo ž", + "evro p", + "g e", + "sta l", + "j de", + "ch y", + "ro di", + "je jí", + "po li", + "de vět", + "s me", + "a ž", + "té to", + "re m", + "d é", + "f or", + "u ni", + "f o", + "ten to", + "a u", + "ka ž", + "nu la", + "na d", + "by ch", + "mo c", + "sto u", + "e x", + "le n", + "k do", + "z d", + "pra co", + "to mu", + "ný m", + "ži vo", + "ze m", + "f e", + "f u", + "ná sle", + "j o", + "sk y", + "ji ch", + "h á", + "mě l", + "dě la", + "j sme", + "p re", + "ni ce", + "ste j", + "ne m", + "st ní", + "he m", + "ná ro", + "z u", + "b li", + "ni t", + "pa r", + "a l", + "poz ději", + "ta ko", + "n ce", + "če r", + "ší m", + "ně co", + "vá l", + "ře j", + "krá t", + "á lní", + "u r", + ". .", + "a si", + "kter é", + "sta v", + "ma jí", + "my s", + "do bě", + "s ně", + "ce n", + "z y", + "z ku", + "t ů", + "ch od", + "s pě", + "je jich", + "sou čas", + "d r", + "va li", + "ri e", + "k te", + "pr ů", + "ze ní", + "pa t", + "a n", + "po tře", + "de m", + "d nes", + "ze mí", + "sa mo", + "zna m", + "b ra", + "má m", + "te dy", + "g o", + "hla vní", + "pou ží", + "b ní", + "ve de", + "le p", + "je k", + "pra v", + "poli ti", + "d ne", + "je m", + "le t", + "če ní", + "pro b", + "ne ž", + "dě l", + "fi l", + "č o", + "cí ch", + "st é", + "d lou", + "h i", + "a by", + "to u", + "několi k", + "d la", + "vy u", + "vi t", + "ho u", + "ck ých", + "no vé", + "či n", + "st y", + "dě lá", + "k ý", + "ob la", + "pod le", + "ra n", + "dů leži", + "ta to", + "po ku", + "ko ne", + "d ý", + "d vě", + "ž ád", + "nou t", + "t ku", + "t vr", + "cké ho", + "ro v", + "r é", + "te le", + "p sa", + "s vět", + "ti vní", + "do sta", + "te m", + "še l", + "druh é", + "s kou", + "ž o", + "jed ná", + "vý znam", + "prob lé", + "pu bli", + "vá n", + "od po", + "pod po", + "d le", + "ja ké", + "še ní", + "ví m", + "bě hem", + "na chází", + "s lou", + "pou ze", + "o tá", + "p lo", + "to vé", + "vět ši", + "ko mi", + "va jí", + "ty to", + "zá pa", + "z mě", + "mo h", + "ví ce", + "spole č", + "au to", + "pro ti", + "st ru", + "dě t", + "chá ze", + "že l", + "с т", + "е н", + "н о", + "н а", + "п р", + "т о", + "п о", + "р а", + "г о", + "к о", + "н е", + "в о", + "в а", + "е т", + "е р", + "н и", + "е л", + "и т", + "н ы", + "з а", + "р о", + "ен и", + "к а", + "л и", + "е м", + "д а", + "о б", + "л а", + "д о", + "с я", + "т ь", + "о т", + "л о", + "л ь", + "е д", + "с о", + "м и", + "р е", + "м о", + "ц и", + "пр о", + "т а", + "э то", + "к и", + "р у", + "пр и", + "т и", + "с е", + "ст а", + "в ы", + "м ы", + "в и", + "б ы", + "м а", + "е с", + "л я", + "ст и", + "л е", + "ч то", + "м е", + "р и", + "ч а", + "о д", + "е й", + "ел ь", + "ени я", + "г а", + "н у", + "с и", + "п а", + "ра з", + "б о", + "ст о", + "с у", + "с а", + "д у", + "е го", + "е ст", + "и н", + "ит ь", + "и з", + "ж е", + "м у", + "п ер", + "по д", + "ени е", + "с ь", + "к у", + "пр ед", + "но го", + "ны х", + "в ер", + "т е", + "но й", + "ци и", + "д е", + "р ы", + "д ел", + "л ю", + "в е", + "о н", + "м ен", + "г и", + "н я", + "б у", + "пр а", + "в се", + "ет ся", + "ст ь", + "ж а", + "до л", + "ж и", + "б е", + "ко н", + "с л", + "ш и", + "д и", + "ст в", + "с ко", + "ны е", + "ч и", + "ю т", + "д ер", + "ст ра", + "т ы", + "х од", + "щ и", + "з о", + "з на", + "но сти", + "ч ес", + "в ля", + "ва ть", + "о р", + "по л", + "в ет", + "та к", + "ш а", + "т у", + "с во", + "пр е", + "о на", + "ит ель", + "ны й", + "с ло", + "ка к", + "в л", + "но сть", + "х о", + "мо ж", + "п е", + "д ля", + "ни я", + "но е", + "ра с", + "дол ж", + "да р", + "т ель", + "с ка", + "п у", + "ст во", + "ко то", + "ра б", + "е е", + "ро д", + "э ти", + "с об", + "о ру", + "ж ен", + "ны м", + "ит и", + "ни е", + "ко м", + "д ет", + "ст у", + "г у", + "п и", + "ме ж", + "ени ю", + "т ер", + "раб от", + "во з", + "ци я", + "ко й", + "щ ест", + "г ра", + "з и", + "р я", + "меж ду", + "ст ва", + "в с", + "ел о", + "ш е", + "м ер", + "б а", + "з ы", + "л у", + "а ль", + "д ей", + "г ла", + "на род", + "к ти", + "пред ста", + "л ся", + "я вля", + "с ки", + "но в", + "ед ин", + "ро в", + "и с", + "ни ма", + "р ем", + "ход и", + "так же", + "д ру", + "а ть", + "сл ед", + "го во", + "на я", + "ю щи", + "ен ь", + "кото ры", + "х от", + "в у", + "и х", + "ем у", + "ч ит", + "ва ж", + "ор га", + "чес ки", + "щ е", + "к е", + "х а", + "по с", + "то м", + "бо ль", + "м не", + "па с", + "об ъ", + "пра в", + "кон ф", + "сл у", + "под дер", + "ст ви", + "на ш", + "ль ко", + "сто я", + "ну ю", + "л ем", + "ен ных", + "к ра", + "д ы", + "между народ", + "г да", + "не об", + "го су", + "ств у", + "ени и", + "госу дар", + "к то", + "и м", + "ч ест", + "р ет", + "во про", + "л ен", + "ел и", + "ро ва", + "ци й", + "на м", + "это й", + "ж ения", + "необ ходи", + "мен я", + "бы ло", + "си ли", + "ф и", + "в я", + "ш ь", + "это го", + "о ни", + "орга ни", + "бе зо", + "пр об", + "и ме", + "ре ш", + "б и", + "безо пас", + "ют ся", + "о ста", + "ен но", + "го д", + "ел а", + "предста в", + "ть ся", + "сло во", + "органи за", + "долж ны", + "это м", + "б ла", + "ч е", + "ч у", + "бла го", + "это му", + "в рем", + "с пе", + "но м", + "ени й", + "с по", + "на с", + "не т", + "з у", + "в ед", + "е ще", + "ска за", + "се й", + "ер ен", + "да н", + "са м", + "ел я", + "ра н", + "зы ва", + "явля ется", + "бу дет", + "кти в", + "т ре", + "дел е", + "м от", + "конф ерен", + "ла сь", + "ча с", + "сто ро", + "ко го", + "е з", + "не й", + "о с", + "ли сь", + "раз ору", + "пер е", + "с си", + "ны ми", + "про ц", + "го ло", + "ч ело", + "бо ле", + "чело ве", + "с ер", + "п л", + "ч ет", + "стра н", + "п я", + "бы л", + "к ла", + "то в", + "ж д", + "дел а", + "е ра", + "у же", + "со вет", + "г ен", + "безопас ности", + "ц а", + "се да", + "по з", + "от вет", + "проб лем", + "на ко", + "т ем", + "до ста", + "п ы", + "щ а", + "во й", + "су щест", + "необходи мо", + "бы ть", + "мож ет", + "д ем", + "что бы", + "е к", + "ч ер", + "у сили", + "ре с", + "ру д", + "един енных", + "д об", + "до сти", + "ств ен", + "я дер", + "год ня", + "ка за", + "се годня", + "сей час", + "то лько", + "во д", + "ес ь", + "м ного", + "бу ду", + "е в", + "ест ь", + "т ри", + "об щест", + ". .", + "я вл", + "вы сту", + "р ед", + "с чит", + "с ит", + "деле га", + "ло ж", + "это т", + "ф ор", + "к лю", + "воз мож", + "ва ния", + "б ли", + "и ли", + "в з", + "на ций", + "ско го", + "при ня", + "п ла", + "о ч", + "ить ся", + "ст е", + "на ши", + "которы е", + "а р", + "име ет", + "с от", + "зна ч", + "пер ь", + "след у", + "ен ы", + "та ки", + "объ единенных", + "ст ро", + "те перь", + "б ле", + "благо дар", + "раз в", + "а н", + "жи ва", + "оч ень", + "я т", + "бе з", + "об ес", + "г ро", + "ло сь", + "с ы", + "организа ции", + "ч лен", + "то го", + "она ль", + "ж да", + "все х", + "с вя", + "боле е", + "со в", + "ко гда", + "во т", + "к ре", + "к ры", + "по этому", + "во ль", + "о й", + "ген ера", + "ч ем", + "л ы", + "пол ити", + "в ен", + "конферен ции", + "проц ес", + "б я", + "ит е", + "от но", + "разв ити", + "а ф", + "ю щ", + "в но", + "ми р", + "ни и", + "ка я", + "а с", + "итель но", + "в то", + "ени ем", + "генера ль", + "пр от", + "вс ем", + "сам бле", + "ас самбле", + "о м", + "з д", + "с мот", + "ре ги", + "ч его", + "од нако", + "усили я", + "дей стви", + "ч но", + "у ча", + "об раз", + "во с", + "э та", + "пер его", + "гово р", + "ва м", + "мо ло", + "врем я", + "д ь", + "хот ел", + "г ру", + "за явл", + "пре доста", + "по ль", + "не е", + "ре зо", + "перего во", + "резо лю", + "к рет", + "поддер ж", + "обес пе", + "не го", + "представ ит", + "на де", + "к ри", + "ч ь", + "про ек", + "л ет", + "дру ги", + "ا ل", + "َ ا", + "و َ", + "ّ َ", + "ِ ي", + "أ َ", + "ل َ", + "ن َ", + "ال ْ", + "ه ُ", + "ُ و", + "م ا", + "ن ْ", + "م ن", + "ع َ", + "ن ا", + "ل ا", + "م َ", + "ت َ", + "ف َ", + "أ ن", + "ل ي", + "م ِ", + "ا ن", + "ف ي", + "ر َ", + "ي َ", + "ه ِ", + "م ْ", + "ق َ", + "ب ِ", + "ل ى", + "ي ن", + "إ ِ", + "ل ِ", + "و ا", + "ك َ", + "ه ا", + "ً ا", + "م ُ", + "و ن", + "ال م", + "ب َ", + "ي ا", + "ذ ا", + "س ا", + "ال ل", + "م ي", + "ي ْ", + "ر ا", + "ر ي", + "ل ك", + "م َا", + "ن َّ", + "ل م", + "إ ن", + "س ت", + "و م", + "ّ َا", + "ل َا", + "ه م", + "ّ ِ", + "ك ُ", + "ك ان", + "س َ", + "ب ا", + "د ي", + "ح َ", + "ع ْ", + "ب ي", + "ال أ", + "و ل", + "ف ِي", + "ر ِ", + "د ا", + "مِ نْ", + "ُو نَ", + "و ْ", + "ه َا", + "ّ ُ", + "ال س", + "ال َ", + "ن ي", + "ل ْ", + "ت ُ", + "ه ل", + "ر ة", + "د َ", + "س ْ", + "ت ِ", + "ن َا", + "ر ْ", + "الل َّ", + "سا مي", + "ك ن", + "ك ل", + "ه َ", + "عَ لَ", + "ع لى", + "م ع", + "إ لى", + "ق د", + "ال ر", + "ُو ا", + "ي ر", + "ع ن", + "ي ُ", + "ن ِ", + "ب ْ", + "ال ح", + "هُ مْ", + "ق ا", + "ذ ه", + "ال ت", + "ِي نَ", + "ج َ", + "ه ذا", + "ع د", + "ال ع", + "د ْ", + "قَ الَ", + "ر ُ", + "ي م", + "ي ة", + "ن ُ", + "خ َ", + "ر ب", + "ال ك", + "و َا", + "أ نا", + "ة ِ", + "ال ن", + "ح د", + "ع ِ", + "ت ا", + "ه و", + "ف ا", + "ع ا", + "ال ش", + "ل ُ", + "ي ت", + "ذ َا", + "ي ع", + "ال ذ", + "ح ْ", + "ال ص", + "إِ نَّ", + "ج ا", + "ع لي", + "ك َا", + "ب ُ", + "ت ع", + "و ق", + "م ل", + "ل َّ", + "ي د", + "أ خ", + "ر ف", + "ت ي", + "ال ِ", + "ّ ا", + "ذ لك", + "أَ نْ", + "س ِ", + "ت وم", + "م ر", + "مَ نْ", + "ب ل", + "ال ق", + "الل ه", + "ِي َ", + "ك م", + "ذ َ", + "ع ل", + "ح ب", + "س ي", + "ع ُ", + "ال ج", + "ال د", + "ش َ", + "ت ك", + "ف ْ", + "ص َ", + "ل ل", + "د ِ", + "ب ر", + "ف ِ", + "ت ه", + "أ ع", + "ت ْ", + "ق ْ", + "الْ أَ", + "ئ ِ", + "عَ نْ", + "و ر", + "ح ا", + "ال َّ", + "م ت", + "ف ر", + "د ُ", + "ه نا", + "وَ أَ", + "ت ب", + "ة ُ", + "أ ي", + "س ب", + "ري د", + "و ج", + "كُ مْ", + "ح ِ", + "ك ْ", + "د ر", + "َا ء", + "ه ذه", + "ال ط", + "الْ مُ", + "د ة", + "ق ل", + "غ َ", + "ي وم", + "الَّ ذ", + "ك ر", + "ت ر", + "ك ِ", + "ك ي", + "عَلَ ى", + "رَ ب", + "ع ة", + "ق ُ", + "ج ْ", + "ف ض", + "ل ة", + "ه ْ", + "ر َا", + "وَ لَ", + "الْ مَ", + "أَ نَّ", + "ي َا", + "أ ُ", + "ش ي", + "اللَّ هُ", + "لَ ى", + "ق ِ", + "أ ت", + "عَلَ يْ", + "اللَّ هِ", + "ال ب", + "ض َ", + "ة ً", + "ق ي", + "ا ر", + "ب د", + "خ ْ", + "سْ تَ", + "ط َ", + "قَ دْ", + "ذه ب", + "أ م", + "ما ذا", + "وَ إِ", + "ة ٌ", + "و نَ", + "لي لى", + "و لا", + "ح ُ", + "ه ي", + "ص ل", + "ال خ", + "و د", + "لي س", + "ل دي", + "ق ال", + "كَا نَ", + "م َّ", + "ح ي", + "ت م", + "ل ن", + "وَ لَا", + "ب ع", + "يم كن", + "س ُ", + "ة َ", + "ح ت", + "ر ًا", + "ك ا", + "ش ا", + "هِ مْ", + "لَ هُ", + "ز َ", + "دا ً", + "م س", + "ك ث", + "الْ عَ", + "ج ِ", + "ص ْ", + "ف َا", + "ل ه", + "و ي", + "ع َا", + "هُ وَ", + "ب ِي", + "ب َا", + "أ س", + "ث َ", + "ل ِي", + "ر ض", + "الر َّ", + "لِ كَ", + "ت َّ", + "ف ُ", + "ق ة", + "ف عل", + "مِ ن", + "ال آ", + "ث ُ", + "س م", + "م َّا", + "بِ هِ", + "ت ق", + "خ ر", + "ل قد", + "خ ل", + "ش ر", + "أن ت", + "ل َّا", + "س ن", + "الس َّ", + "الذ ي", + "س َا", + "و ما", + "ز ل", + "و ب", + "أ ْ", + "إ ذا", + "ر ِي", + "ح ة", + "ن ِي", + "الْ حَ", + "وَ قَالَ", + "ب ه", + "ة ٍ", + "س أ", + "ر ٌ", + "ب ال", + "م ة", + "ش ْ", + "و ت", + "عن د", + "ف س", + "بَ عْ", + "ه ر", + "ق ط", + "أ ح", + "إن ه", + "و ع", + "ف ت", + "غ ا", + "هنا ك", + "ب ت", + "مِ نَ", + "س ر", + "ذَ لِكَ", + "ر س", + "حد ث", + "غ ْ", + "ّ ِي", + "ال إ", + "وَ يَ", + "ج ل", + "ا ست", + "ق ِي", + "ع ب", + "و س", + "ي ش", + "الَّذ ِينَ", + "تا ب", + "د ِي", + "ج ب", + "ك ون", + "ب ن", + "ال ث", + "لَ يْ", + "ب عد", + "وَ الْ", + "فَ أَ", + "ع م", + "هُ م", + "ت ن", + "ذ ْ", + "أ ص", + "أ ين", + "رَب ِّ", + "الذ ين", + "إِ ن", + "ب ين", + "ج ُ", + "عَلَيْ هِ", + "ح َا", + "ل و", + "ست ط", + "ظ ر", + "لَ مْ", + "ء ِ", + "كُ ل", + "ط ل", + "ت َا", + "ض ُ", + "كن ت", + "ل ًا", + "م ٌ", + "ق بل", + "ـ ـ", + "ذ ِ", + "قَ وْ", + "ص ِ", + "م ًا", + "كان ت", + "ص ا", + "ي ق", + "ال ف", + "ال نا", + "م ٍ", + "إِ نْ", + "ال نَّ", + "ج د", + "وَ مَا", + "ت ت", + "ب ح", + "م كان", + "كي ف", + "ّ ة", + "ال ا", + "ج َا", + "أ و", + "سا عد", + "ض ِ", + "إ لا", + "را ً", + "ق َا", + "ر أ", + "ع ت", + "أ حد", + "ه د", + "ض ا", + "ط ر", + "أ ق", + "ما ء", + "د َّ", + "ال با", + "م ُو", + "أَ وْ", + "ط ا", + "ق ُو", + "خ ِ", + "ت ل", + "ستط يع", + "د َا", + "الن َّا", + "إ لَى", + "وَ تَ", + "هَ ذَا", + "ب ة", + "علي ك", + "ج ر", + "ال من", + "ز ا", + "ر ٍ", + "د ع", + "ّ ًا", + "س ة", + "ثُ مَّ", + "شي ء", + "ال غ", + "ت ح", + "ر ُونَ", + "ال يوم", + "م ِي", + "ن ُوا", + "أ ر", + "تُ مْ", + "ع ر", + "ي ف", + "أ ب", + "د ًا", + "ص َا", + "الت َّ", + "أ ريد", + "ال ز", + "يَ وْ", + "إ لي", + "ج ي", + "يَ عْ", + "فض ل", + "ال إن", + "أن ه", + "n g", + "i 4", + "a n", + "s h", + "z h", + "i 2", + "ng 1", + "u 4", + "i 1", + "ng 2", + "d e", + "j i", + "a o", + "x i", + "u 3", + "de 5", + "e 4", + "i 3", + "ng 4", + "an 4", + "e n", + "u o", + "sh i4", + "an 2", + "u 2", + "c h", + "u 1", + "ng 3", + "a 1", + "an 1", + "e 2", + "a 4", + "e i4", + "o ng1", + "a i4", + "ao 4", + "h u", + "a ng1", + "l i", + "y o", + "an 3", + "w ei4", + "uo 2", + "n 1", + "en 2", + "ao 3", + "e 1", + "y u", + "q i", + "e ng2", + "zh o", + "a ng3", + "a ng4", + "a ng2", + "uo 4", + "m i", + "g e4", + "y i1", + "g uo2", + "e r", + "b i", + "a 3", + "h e2", + "e 3", + "y i2", + "d i4", + "zh ong1", + "b u4", + "g u", + "a i2", + "n 2", + "z ai4", + "sh i2", + "e ng1", + "r en2", + "o ng2", + "xi an4", + "y i", + "x u", + "n 4", + "l i4", + "en 4", + "y u2", + "e i2", + "yi2 ge4", + "o u4", + "e i3", + "d i", + "u i4", + "a 2", + "yo u3", + "ao 1", + "d a4", + "ch eng2", + "en 1", + "e ng4", + "y i4", + "s i1", + "zh i4", + "ji a1", + "yu an2", + "n i", + "t a1", + "de5 yi2ge4", + "k e1", + "sh u3", + "x i1", + "j i2", + "ao 2", + "t i", + "o u3", + "o ng4", + "xi a4", + "a i1", + "g ong1", + "zh i1", + "en 3", + "w ei2", + "j u", + "xu e2", + "q u1", + "zho u1", + "er 3", + "mi ng2", + "zho ng3", + "l i3", + "w u4", + "y i3", + "uo 1", + "e 5", + "j i4", + "xi ng2", + "ji an4", + "hu a4", + "y u3", + "uo 3", + "j i1", + "a i3", + "z uo4", + "h ou4", + "hu i4", + "e i1", + "ni an2", + "q i2", + "p i", + "d ao4", + "sh eng1", + "de 2", + "d ai4", + "u an2", + "zh e4", + "zh eng4", + "b en3", + "sh ang4", + "zh u3", + "b ei4", + "y e4", + "ch u1", + "zh an4", + "l e5", + "l ai2", + "sh i3", + "n an2", + "r en4", + "yo u2", + "k e4", + "b a1", + "f u4", + "d ui4", + "y a4", + "m ei3", + "z i4", + "xi n1", + "ji ng1", + "zh u", + "n 3", + "yo ng4", + "m u4", + "ji ao4", + "y e3", + "ji n4", + "bi an4", + "l u4", + "q i1", + "sh e4", + "xi ang1", + "o ng3", + "sh u4", + "d ong4", + "s uo3", + "gu an1", + "s an1", + "b o", + "t e4", + "d uo1", + "f u2", + "mi n2", + "l a1", + "zh i2", + "zh en4", + "o u1", + "w u3", + "m a3", + "i 5", + "z i5", + "j u4", + "er 4", + "y ao4", + "xia4 de5yi2ge4", + "s i4", + "t u2", + "sh an1", + "z ui4", + "ch u", + "yi n1", + "er 2", + "t ong2", + "d ong1", + "y u4", + "y an2", + "qi an2", + "shu3 xia4de5yi2ge4", + "ju n1", + "k e3", + "w en2", + "f a3", + "l uo2", + "zh u4", + "x i4", + "k ou3", + "b ei3", + "ji an1", + "f a1", + "di an4", + "ji ang1", + "wei4 yu2", + "xi ang4", + "zh i3", + "e ng3", + "f ang1", + "l an2", + "sh u", + "r i4", + "li an2", + "sh ou3", + "m o", + "qi u2", + "ji n1", + "h uo4", + "shu3xia4de5yi2ge4 zhong3", + "f en1", + "n ei4", + "g ai1", + "mei3 guo2", + "u n2", + "g e2", + "b ao3", + "qi ng1", + "g ao1", + "t ai2", + "d u", + "xi ao3", + "ji e2", + "ti an1", + "ch ang2", + "q uan2", + "li e4", + "h ai3", + "f ei1", + "t i3", + "ju e2", + "o u2", + "c i3", + "z u2", + "n i2", + "bi ao3", + "zhong1 guo2", + "d u4", + "yu e4", + "xi ng4", + "sh eng4", + "ch e1", + "d an1", + "ji e1", + "li n2", + "pi ng2", + "f u3", + "g u3", + "ji e4", + "w o", + "v 3", + "sh eng3", + "n a4", + "yu an4", + "zh ang3", + "gu an3", + "d ao3", + "z u3", + "di ng4", + "di an3", + "c eng2", + "ren2 kou3", + "t ai4", + "t ong1", + "g uo4", + "n eng2", + "ch ang3", + "hu a2", + "li u2", + "yi ng1", + "xi ao4", + "c i4", + "bian4 hua4", + "li ang3", + "g ong4", + "zho ng4", + "de5 yi1", + "s e4", + "k ai1", + "w ang2", + "ji u4", + "sh i1", + "sh ou4", + "m ei2", + "k u", + "s u", + "f eng1", + "z e2", + "tu2 shi4", + "t i2", + "q i4", + "ji u3", + "sh en1", + "zh e3", + "ren2kou3 bian4hua4", + "ren2kou3bian4hua4 tu2shi4", + "di4 qu1", + "y ang2", + "m en", + "men 5", + "l ong2", + "bi ng4", + "ch an3", + "zh u1", + "w ei3", + "w ai4", + "xi ng1", + "bo 1", + "b i3", + "t ang2", + "hu a1", + "bo 2", + "shu i3", + "sh u1", + "d ou1", + "s ai4", + "ch ao2", + "b i4", + "li ng2", + "l ei4", + "da4 xue2", + "f en4", + "shu3 de5", + "m u3", + "ji ao1", + "d ang1", + "ch eng1", + "t ong3", + "n v3", + "q i3", + "y an3", + "mi an4", + "l uo4", + "ji ng4", + "g e1", + "r u4", + "d an4", + "ri4 ben3", + "p u3", + "yu n4", + "hu ang2", + "wo 3", + "l v", + "h ai2", + "shi4 yi1", + "xi e1", + "yi ng3", + "w u2", + "sh en2", + "w ang3", + "gu ang3", + "li u4", + "s u4", + "shi4 zhen4", + "c an1", + "c ao3", + "xi a2", + "k a3", + "d a2", + "h u4", + "b an4", + "d ang3", + "h u2", + "z ong3", + "de ng3", + "de5yi2ge4 shi4zhen4", + "ch uan2", + "mo 4", + "zh ang1", + "b an1", + "mo 2", + "ch a2", + "c e4", + "zhu3 yao4", + "t ou2", + "j u2", + "shi4 wei4yu2", + "s a4", + "u n1", + "ke3 yi3", + "d u1", + "h an4", + "li ang4", + "sh a1", + "ji a3", + "z i1", + "lv 4", + "f u1", + "xi an1", + "x u4", + "gu ang1", + "m eng2", + "b ao4", + "yo u4", + "r ong2", + "zhi1 yi1", + "w ei1", + "m ao2", + "guo2 jia1", + "c ong2", + "g ou4", + "ti e3", + "zh en1", + "d u2", + "bi an1", + "c i2", + "q u3", + "f an4", + "xi ang3", + "m en2", + "j u1", + "h ong2", + "z i3", + "ta1 men5", + "ji 3", + "z ong1", + "zhou1 de5yi2ge4shi4zhen4", + "t uan2", + "ji ng3", + "gong1 si1", + "xi e4", + "l i2", + "li4 shi3", + "b ao1", + "g ang3", + "gu i1", + "zh eng1", + "zhi2 wu4", + "ta1 de5", + "pi n3", + "zhu an1", + "ch ong2", + "shi3 yong4", + "w a3", + "sh uo1", + "chu an1", + "l ei2", + "w an1", + "h uo2", + "q u", + "s u1", + "z ao3", + "g ai3", + "q u4", + "g u4", + "l u", + "x i2", + "h ang2", + "yi ng4", + "c un1", + "g en1", + "yi ng2", + "ti ng2", + "cheng2 shi4", + "ji ang3", + "li ng3", + "l un2", + "bu4 fen4", + "de ng1", + "xu an3", + "dong4 wu4", + "de2 guo2", + "xi an3", + "f an3", + "zh e5", + "h an2", + "h ao4", + "m i4", + "r an2", + "qi n1", + "ti ao2", + "zh an3", + "h i", + "k a", + "n o", + "t e", + "s u", + "s hi", + "t a", + "t o", + "n a", + "w a", + "o u", + "r u", + "n i", + "k u", + "k i", + "g a", + "d e", + "k o", + "m a", + "r e", + "r a", + "m o", + "t su", + "w o", + "e n", + "r i", + "s a", + "d a", + "s e", + "j i", + "h a", + "c hi", + "k e", + "te ki", + "m i", + "y ou", + "s h", + "s o", + "y o", + "y a", + "na i", + "t te", + "a ru", + "b a", + "u u", + "t ta", + "ka i", + "ka n", + "shi te", + "m e", + "d o", + "mo no", + "se i", + "r o", + "ko to", + "ka ra", + "shi ta", + "b u", + "m u", + "c h", + "su ru", + "k ou", + "g o", + "ma su", + "ta i", + "f u", + "k en", + "i u", + "g en", + "wa re", + "shi n", + "z u", + "a i", + "o n", + "o ku", + "g i", + "d ou", + "n e", + "y uu", + "i ru", + "i te", + "ji ko", + "de su", + "j u", + "ra re", + "sh u", + "b e", + "sh ou", + "s ha", + "se kai", + "s ou", + "k you", + "ma shita", + "s en", + "na ra", + "sa n", + "ke i", + "i ta", + "a ri", + "i tsu", + "ko no", + "j ou", + "na ka", + "ch ou", + "so re", + "g u", + "na ru", + "ga ku", + "re ba", + "g e", + "h o", + "i n", + "hi to", + "sa i", + "na n", + "da i", + "tsu ku", + "shi ki", + "sa re", + "na ku", + "p p", + "bu n", + "ju n", + "so no", + "ka ku", + "z ai", + "b i", + "to u", + "wa ta", + "sh uu", + "i i", + "te i", + "ka re", + "y u", + "shi i", + "ma de", + "sh o", + "a n", + "ke reba", + "shi ka", + "i chi", + "ha n", + "de ki", + "ni n", + "ware ware", + "na kereba", + "o ite", + "h ou", + "ya ku", + "ra i", + "mu jun", + "l e", + "yo ku", + "bu tsu", + "o o", + "ko n", + "o mo", + "ga e", + "nara nai", + "ta chi", + "z en", + "ch uu", + "kan gae", + "ta ra", + "to ki", + "ko ro", + "mujun teki", + "z e", + "na ga", + "ji n", + "shi ma", + "te n", + "i ki", + "i ku", + "no u", + "i masu", + "r ou", + "h on", + "ka e", + "t to", + "ko re", + "ta n", + "ki ta", + "i s", + "da tta", + "ji tsu", + "ma e", + "i e", + "me i", + "da n", + "h e", + "to ku", + "dou itsu", + "ri tsu", + "k yuu", + "h you", + "rare ta", + "kei sei", + "k kan", + "rare ru", + "m ou", + "do ko", + "r you", + "da ke", + "naka tta", + "so ko", + "ta be", + "e r", + "ha na", + "c o", + "fu ku", + "p a", + "so n", + "ya su", + "ch o", + "wata ku", + "ya ma", + "z a", + "k yo", + "gen zai", + "b oku", + "a ta", + "j a", + "ka wa", + "ma sen", + "j uu", + "ro n", + "b o", + "na tte", + "wataku shi", + "yo tte", + "ma i", + "g ou", + "ha i", + "mo n", + "ba n", + "ji shin", + "c a", + "re te", + "n en", + "o ka", + "ka gaku", + "na tta", + "p o", + "ka ru", + "na ri", + "m en", + "ma ta", + "e i", + "ku ru", + "ga i", + "ka ri", + "sha kai", + "kou i", + "yo ri", + "se tsu", + "j o", + "re ru", + "to koro", + "ju tsu", + "i on", + "sa ku", + "tta i", + "c ha", + "nin gen", + "n u", + "c e", + "ta me", + "kan kyou", + "de n", + "o oku", + "i ma", + "wata shi", + "tsuku ru", + "su gi", + "b en", + "ji bun", + "shi tsu", + "ke ru", + "ki n", + "ki shi", + "shika shi", + "mo to", + "ma ri", + "i tte", + "de shita", + "n de", + "ari masu", + "te r", + "z ou", + "ko e", + "ze ttai", + "kkan teki", + "h en", + "re kishi", + "deki ru", + "tsu ka", + "l a", + "i tta", + "o i", + "ko butsu", + "mi ru", + "sh oku", + "shi masu", + "gi jutsu", + "g you", + "jou shiki", + "a tta", + "ho do", + "ko ko", + "tsuku rareta", + "z oku", + "hi tei", + "ko ku", + "rekishi teki", + "ke te", + "o ri", + "i mi", + "ka ko", + "naga ra", + "ka karu", + "shu tai", + "ha ji", + "ma n", + "ta ku", + "ra n", + "douitsu teki", + "z o", + "me te", + "re i", + "tsu u", + "sare te", + "gen jitsu", + "p e", + "s t", + "ba i", + "na wa", + "ji kan", + "wa ru", + "r t", + "a tsu", + "so ku", + "koui teki", + "a ra", + "u ma", + "a no", + "i de", + "ka ta", + "te tsu", + "ga wa", + "ke do", + "re ta", + "mi n", + "sa you", + "tte ru", + "to ri", + "p u", + "ki mi", + "b ou", + "mu ra", + "sare ru", + "ma chi", + "k ya", + "o sa", + "kon na", + "a ku", + "a l", + "sare ta", + "i pp", + "shi ku", + "u chi", + "hito tsu", + "ha tara", + "tachi ba", + "shi ro", + "ka tachi", + "to mo", + "e te", + "me ru", + "ni chi", + "da re", + "ka tta", + "e ru", + "su ki", + "a ge", + "oo ki", + "ma ru", + "mo ku", + "o ko", + "kangae rareru", + "o to", + "tan ni", + "ta da", + "tai teki", + "mo tte", + "ki nou", + "shi nai", + "k ki", + "u e", + "ta ri", + "l i", + "ra nai", + "k kou", + "mi rai", + "pp on", + "go to", + "hi n", + "hi tsu", + "te ru", + "mo chi", + "ka tsu", + "re n", + "n yuu", + "su i", + "zu ka", + "tsu ite", + "no mi", + "su gu", + "ku da", + "tetsu gaku", + "i ka", + "ron ri", + "o ki", + "ni ppon", + "p er", + "shi mashita", + "chi shiki", + "cho kkanteki", + "su ko", + "t ion", + "ku u", + "a na", + "a rou", + "ka tte", + "ku ri", + "i nai", + "hyou gen", + "i shiki", + "do ku", + "a tte", + "a tara", + "to n", + "wa ri", + "ka o", + "sei san", + "hana shi", + "s i", + "ka ke", + "na ji", + "su nawa", + "sunawa chi", + "u go", + "su u", + "ba ra", + "le v", + "hi ro", + "i wa", + "be tsu", + "yo i", + "se ru", + "shite ru", + "rare te", + "to shi", + "se ki", + "tai ritsu", + "wa kara", + "to kyo", + "k ka", + "k yoku", + "u n", + "i ro", + "mi te", + "sa ki", + "kan ji", + "mi ta", + "su be", + "r yoku", + "ma tta", + "kuda sai", + "omo i", + "ta no", + "ware ru", + "co m", + "hitsu you", + "ka shi", + "re nai", + "kan kei", + "a to", + "ga tte", + "o chi", + "mo tsu", + "in g", + "son zai", + "l l", + "o re", + "tai shite", + "a me", + "sei mei", + "ka no", + "gi ri", + "kangae ru", + "yu e", + "a sa", + "o naji", + "yo ru", + "ni ku", + "osa ka", + "suko shi", + "c k", + "ta ma", + "kano jo", + "ki te", + "mon dai", + "a mari", + "e ki", + "ko jin", + "ha ya", + "i t", + "de te", + "atara shii", + "a wa", + "ga kkou", + "tsu zu", + "shu kan", + "i mashita", + "mi na", + "ata e", + "da rou", + "hatara ku", + "ga ta", + "da chi", + "ma tsu", + "ari masen", + "sei butsu", + "mi tsu", + "he ya", + "yasu i", + "d i", + "de ni", + "no ko", + "ha ha", + "do mo", + "ka mi", + "su deni", + "na o", + "ra ku", + "i ke", + "a ki", + "me ta", + "l o", + "ko domo", + "so shite", + "ga me", + "ba kari", + "to te", + "ha tsu", + "mi se", + "moku teki", + "da kara" + ] + } +} \ No newline at end of file diff --git a/tests/tts_tests2/test_xtts_gpt_train.py b/tests/tts_tests2/test_xtts_gpt_train.py new file mode 100644 index 0000000000..5e3bc22648 --- /dev/null +++ b/tests/tts_tests2/test_xtts_gpt_train.py @@ -0,0 +1,163 @@ +import os +import shutil + +import torch +from trainer import Trainer, TrainerArgs + +from tests import get_tests_output_path +from TTS.config.shared_configs import BaseDatasetConfig +from TTS.tts.datasets import load_tts_samples +from TTS.tts.layers.xtts.dvae import DiscreteVAE +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig + +config_dataset = BaseDatasetConfig( + formatter="ljspeech", + dataset_name="ljspeech", + path="tests/data/ljspeech/", + meta_file_train="metadata.csv", + meta_file_val="metadata.csv", + language="en", +) + +DATASETS_CONFIG_LIST = [config_dataset] + +# Logging parameters +RUN_NAME = "GPT_XTTS_LJSpeech_FT" +PROJECT_NAME = "XTTS_trainer" +DASHBOARD_LOGGER = "tensorboard" +LOGGER_URI = None + +# Set here the path that the checkpoints will be saved. Default: ./run/training/ +OUT_PATH = os.path.join(get_tests_output_path(), "train_outputs", "xtts_tests") +os.makedirs(OUT_PATH, exist_ok=True) + +# Create DVAE checkpoint and mel_norms on test time +# DVAE parameters: For the training we need the dvae to extract the dvae tokens, given that you must provide the paths for this model +DVAE_CHECKPOINT = os.path.join(OUT_PATH, "dvae.pth") # DVAE checkpoint +MEL_NORM_FILE = os.path.join( + OUT_PATH, "mel_stats.pth" +) # Mel spectrogram norms, required for dvae mel spectrogram extraction +dvae = DiscreteVAE( + channels=80, + normalization=None, + positional_dims=1, + num_tokens=8192, + codebook_dim=512, + hidden_dim=512, + num_resnet_blocks=3, + kernel_size=3, + num_layers=2, + use_transposed_convs=False, +) +torch.save(dvae.state_dict(), DVAE_CHECKPOINT) +mel_stats = torch.ones(80) +torch.save(mel_stats, MEL_NORM_FILE) + + +# XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. +TOKENIZER_FILE = "tests/inputs/xtts_vocab.json" # vocab.json file +XTTS_CHECKPOINT = None # "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/132500_gpt_ema_coqui_tts_with_enhanced_hifigan.pth" # model.pth file + + +# Training sentences generations +SPEAKER_REFERENCE = "tests/data/ljspeech/wavs/LJ001-0002.wav" # speaker reference to be used in training test sentences +LANGUAGE = config_dataset.language + + +# Training Parameters +OPTIMIZER_WD_ONLY_ON_WEIGHTS = True # for multi-gpu training please make it False +START_WITH_EVAL = False # if True it will star with evaluation +BATCH_SIZE = 2 # set here the batch size +GRAD_ACUMM_STEPS = 1 # set here the grad accumulation steps +# Note: we recommend that BATCH_SIZE * GRAD_ACUMM_STEPS need to be at least 252 for more efficient training. You can increase/decrease BATCH_SIZE but then set GRAD_ACUMM_STEPS accordingly. + + +# init args and config +model_args = GPTArgs( + max_conditioning_length=132300, # 6 secs + min_conditioning_length=66150, # 3 secs + debug_loading_failures=False, + max_wav_length=255995, # ~11.6 seconds + max_text_length=200, + mel_norm_file=MEL_NORM_FILE, + dvae_checkpoint=DVAE_CHECKPOINT, + xtts_checkpoint=XTTS_CHECKPOINT, # checkpoint path of the model that you want to fine-tune + tokenizer_file=TOKENIZER_FILE, + gpt_num_audio_tokens=8194, + gpt_start_audio_token=8192, + gpt_stop_audio_token=8193, +) +audio_config = XttsAudioConfig( + sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 +) +config = GPTTrainerConfig( + epochs=1, + output_path=OUT_PATH, + model_args=model_args, + run_name=RUN_NAME, + project_name=PROJECT_NAME, + run_description=""" + GPT XTTS training + """, + dashboard_logger=DASHBOARD_LOGGER, + logger_uri=LOGGER_URI, + audio=audio_config, + batch_size=BATCH_SIZE, + batch_group_size=48, + eval_batch_size=BATCH_SIZE, + num_loader_workers=8, + eval_split_max_size=256, + print_step=50, + plot_step=100, + log_model_step=1000, + save_step=10000, + save_n_checkpoints=1, + save_checkpoints=True, + # target_loss="loss", + print_eval=False, + # Optimizer values like tortoise, pytorch implementation with modifications to not apply WD to non-weight parameters. + optimizer="AdamW", + optimizer_wd_only_on_weights=OPTIMIZER_WD_ONLY_ON_WEIGHTS, + optimizer_params={"betas": [0.9, 0.96], "eps": 1e-8, "weight_decay": 1e-2}, + lr=5e-06, # learning rate + lr_scheduler="MultiStepLR", + # it was adjusted accordly for the new step scheme + lr_scheduler_params={"milestones": [50000 * 18, 150000 * 18, 300000 * 18], "gamma": 0.5, "last_epoch": -1}, + test_sentences=[ + { + "text": "This cake is great. It's so delicious and moist.", + "speaker_wav": SPEAKER_REFERENCE, + "language": LANGUAGE, + }, + ], +) + +# init the model from config +model = GPTTrainer.init_from_config(config) + +# load training samples +train_samples, eval_samples = load_tts_samples( + DATASETS_CONFIG_LIST, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) + +# init the trainer and 🚀 +trainer = Trainer( + TrainerArgs( + restore_path=None, # xtts checkpoint is restored via xtts_checkpoint key so no need of restore it using Trainer restore_path parameter + skip_train_epoch=False, + start_with_eval=True, + grad_accum_steps=GRAD_ACUMM_STEPS, + ), + config, + output_path=OUT_PATH, + model=model, + train_samples=train_samples, + eval_samples=eval_samples, +) +trainer.fit() + +# remove output path +shutil.rmtree(OUT_PATH) From ec7f54768a5212214aefc45e3400675cdfd03397 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Sat, 21 Oct 2023 17:37:51 -0300 Subject: [PATCH 13/24] Rebase bug fix and update recipe --- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 9 ++--- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 38 ++++++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 22577ad495..e93063faf6 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -1,7 +1,5 @@ -import os -import sys from dataclasses import dataclass, field -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Tuple, Union import torch import torch.nn as nn @@ -12,13 +10,10 @@ from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler -from TTS.tts.configs.tortoise_config import TortoiseConfig from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.datasets.dataset import TTSDataset from TTS.tts.layers.tortoise.arch_utils import TorchMelSpectrogram from TTS.tts.layers.xtts.dvae import DiscreteVAE -from TTS.tts.layers.xtts.gpt import GPT -from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.tts.models.base_tts import BaseTTS @@ -456,7 +451,7 @@ def load_checkpoint( ): # pylint: disable=unused-argument, disable=W0201, disable=W0102, redefined-builtin """Load the model checkpoint and setup for training or inference""" - state, _ = self.xtts.get_compatible_checkpoint_state(checkpoint_path) + state = self.xtts.get_compatible_checkpoint_state_dict(checkpoint_path) # load the model weights self.xtts.load_state_dict(state, strict=strict) diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 641d050cb9..6fb1c221ca 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -5,6 +5,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.utils.manage import ModelManager + # Logging parameters RUN_NAME = "GPT_XTTS_LJSpeech_FT" @@ -22,7 +24,7 @@ GRAD_ACUMM_STEPS = 84 # set here the grad accumulation steps # Note: we recommend that BATCH_SIZE * GRAD_ACUMM_STEPS need to be at least 252 for more efficient training. You can increase/decrease BATCH_SIZE but then set GRAD_ACUMM_STEPS accordingly. -# Define here the dataset that you want to use for the fine tuning +# Define here the dataset that you want to use for the fine-tuning on. config_dataset = BaseDatasetConfig( formatter="ljspeech", dataset_name="ljspeech", @@ -31,20 +33,34 @@ language="en", ) +# Add here the configs of the datasets DATASETS_CONFIG_LIST = [config_dataset] -# ToDo: update with the latest released checkpoints +# Define the path where XTTS v1.1.1 files will be downloaded +CHECKPOINTS_OUT_PATH = os.path.join(OUT_PATH, "XTTS_v1.1_original_model_files/") +os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True) -# DVAE parameters: For the training we need the dvae to extract the dvae tokens, given that you must provide the paths for this model -DVAE_CHECKPOINT = "/raid/datasets/xtts_models/dvae.pth" # DVAE checkpoint -MEL_NORM_FILE = ( - "/raid/datasets/xtts_models/mel_stats.pth" # Mel spectrogram norms, required for dvae mel spectrogram extraction -) -# XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. -TOKENIZER_FILE = "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/tokenizer_merged_5.json" # vocab.json file -XTTS_CHECKPOINT = "/raid/edresson/dev/Checkpoints/XTTS_evaluation/xtts_style_emb_repetition_fix_gt/132500_gpt_ema_coqui_tts_with_enhanced_hifigan.pth" # model.pth file +# DVAE files +DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/denoising_dvae_v3_small.pth" +MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/mel_stats.pth" +# download DVAE files +print(" > Downloading DVAE files!") +ModelManager._download_model_files([MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) +# Set the path to the downloaded files +DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, DVAE_CHECKPOINT_LINK.split("/")[-1]) +MEL_NORM_FILE = os.path.join(CHECKPOINTS_OUT_PATH, MEL_NORM_LINK.split("/")[-1]) + +# Download XTTS v1.1 checkpoint +TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/vocab.json" +XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/model.pth" +print(" > Downloading XTTS v1.1 files!") +ModelManager._download_model_files([TOKENIZER_FILE_LINK, XTTS_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) + +# XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. +TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, TOKENIZER_FILE_LINK.split("/")[-1]) # vocab.json file +XTTS_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, XTTS_CHECKPOINT_LINK.split("/")[-1]) # model.pth file # Training sentences generations SPEAKER_REFERENCE = ( @@ -71,9 +87,11 @@ def main(): gpt_start_audio_token=8192, gpt_stop_audio_token=8193, ) + # define audio config audio_config = XttsAudioConfig( sample_rate=22050, dvae_sample_rate=22050, diffusion_sample_rate=24000, output_sample_rate=24000 ) + # training parameters config config = GPTTrainerConfig( output_path=OUT_PATH, model_args=model_args, From e8a1a50273c1ffaa3c1977b765d5060e8fbde76b Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 09:26:36 -0300 Subject: [PATCH 14/24] Remove unused vars in Delightful TTS layers tests --- tests/tts_tests2/test_delightful_tts_layers.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/tts_tests2/test_delightful_tts_layers.py b/tests/tts_tests2/test_delightful_tts_layers.py index 073bb1eb5a..2eacc06598 100644 --- a/tests/tts_tests2/test_delightful_tts_layers.py +++ b/tests/tts_tests2/test_delightful_tts_layers.py @@ -28,7 +28,7 @@ def test_acoustic_model(): dummy_tokens = torch.rand((1, 41)).long().to(device) - dummy_text_lens = torch.tensor([41]).to(device) + dummy_text_lens = torch.tensor([41]).long().to(device) dummy_spec = torch.rand((1, 100, 207)).to(device) dummy_spec_lens = torch.tensor([207]).to(device) dummy_pitch = torch.rand((1, 1, 207)).long().to(device) @@ -38,6 +38,7 @@ def test_acoustic_model(): args.num_mels = 100 acoustic_model = AcousticModel(args=args, tokenizer=tokenizer, speaker_manager=None).to(device) + acoustic_model = acoustic_model.train() output = acoustic_model( tokens=dummy_tokens, @@ -56,11 +57,7 @@ def test_acoustic_model(): def test_hifi_decoder(): dummy_input = torch.rand((1, 207, 100)).to(device) - dummy_text_lens = torch.tensor([41]).to(device) - dummy_spec = torch.rand((1, 100, 207)).to(device) dummy_spec_lens = torch.tensor([207]).to(device) - dummy_pitch = torch.rand((1, 1, 207)).long().to(device) - dummy_energy = torch.rand((1, 1, 207)).long().to(device) waveform_decoder = HifiganGenerator( 100, @@ -77,6 +74,7 @@ def test_hifi_decoder(): conv_post_weight_norm=False, conv_post_bias=False, ).to(device) + waveform_decoder = waveform_decoder.train() vocoder_input_slices, slice_ids = rand_segments( # pylint: disable=unused-variable x=dummy_input.transpose(1, 2), From 653f2e75efe6da0dd020933db2b90228dc66d919 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 09:58:16 -0300 Subject: [PATCH 15/24] Update xtts trainer recipe --- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 6fb1c221ca..02b321f629 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -42,7 +42,7 @@ # DVAE files -DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/denoising_dvae_v3_small.pth" +DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/dvae.pth" MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/mel_stats.pth" # download DVAE files print(" > Downloading DVAE files!") From 8853e1c3ecce7feb1382069800bd1f16027a7921 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 10:45:41 -0300 Subject: [PATCH 16/24] Update XTTS recipe to only download checkpoint if it is needed --- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 02b321f629..c170b1c828 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -44,24 +44,31 @@ # DVAE files DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/dvae.pth" MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/mel_stats.pth" -# download DVAE files -print(" > Downloading DVAE files!") -ModelManager._download_model_files([MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, DVAE_CHECKPOINT_LINK.split("/")[-1]) MEL_NORM_FILE = os.path.join(CHECKPOINTS_OUT_PATH, MEL_NORM_LINK.split("/")[-1]) -# Download XTTS v1.1 checkpoint +# download DVAE files if needed +if not os.path.isfile(DVAE_CHECKPOINT) or not os.path.isfile(MEL_NORM_FILE): + print(" > Downloading DVAE files!") + ModelManager._download_model_files([MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) + + +# Download XTTS v1.1 checkpoint if needed TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/vocab.json" XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/model.pth" -print(" > Downloading XTTS v1.1 files!") -ModelManager._download_model_files([TOKENIZER_FILE_LINK, XTTS_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, TOKENIZER_FILE_LINK.split("/")[-1]) # vocab.json file XTTS_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, XTTS_CHECKPOINT_LINK.split("/")[-1]) # model.pth file +# download XTTS v1.1 files if needed +if not os.path.isfile(TOKENIZER_FILE) or not os.path.isfile(XTTS_CHECKPOINT): + print(" > Downloading XTTS v1.1 files!") + ModelManager._download_model_files([TOKENIZER_FILE_LINK, XTTS_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) + + # Training sentences generations SPEAKER_REFERENCE = ( "./tests/data/ljspeech/wavs/LJ001-0002.wav" # speaker reference to be used in training test sentences From 6fefc36e5a9438d8a2fa3f51b77e03468a99d67c Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 11:03:57 -0300 Subject: [PATCH 17/24] Update XTTS docs --- docs/source/models/xtts.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index ff6bcf974a..f606bf2f60 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -134,6 +134,17 @@ torchaudio.save("xtts_streaming.wav", wav.squeeze().unsqueeze(0).cpu(), 24000) ``` +### Training + +A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset looks like below. Let's be creative and call this `train_gpt_xtts.py`. + + ```{literalinclude} ../../recipes/ljspeech/xtts_v1/train_gpt_xtts.py + ``` + +You need to change the fields of the `BaseDatasetConfig` to match your dataset and then update `GPTArgs` and `GPTTrainerConfig` fields as you need. By default, it will use the same parameters that XTTS v1.1 model was trained with. To speed up the model convergence, as default, it will also download the XTTS v1.1 checkpoint and load it. + + + ## Important resources & papers - VallE: https://arxiv.org/abs/2301.02111 - Tortoise Repo: https://github.com/neonbjb/tortoise-tts From 1ee809679928e7a515667fc50c8f3bf6c6ea4735 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 11:13:09 -0300 Subject: [PATCH 18/24] Update XTTS docs --- docs/source/models/xtts.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index f606bf2f60..b5d0f4bb3e 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -136,8 +136,7 @@ torchaudio.save("xtts_streaming.wav", wav.squeeze().unsqueeze(0).cpu(), 24000) ### Training -A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset looks like below. Let's be creative and call this `train_gpt_xtts.py`. - +A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset is available at https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech/xtts_v1/train_gpt_xtts.py and it looks like below. ```{literalinclude} ../../recipes/ljspeech/xtts_v1/train_gpt_xtts.py ``` From 37b794547468bf7b70f5c00d679fd5d4da25fc48 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 11:39:17 -0300 Subject: [PATCH 19/24] Update XTTS train not implemented error to point to the XTTS docs --- TTS/tts/models/xtts.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 1031e4bdd2..40fce765f3 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -775,10 +775,10 @@ def inference_stream( yield wav_chunk def forward(self): - raise NotImplementedError("XTTS Training is not implemented") + raise NotImplementedError("XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training") def eval_step(self): - raise NotImplementedError("XTTS Training is not implemented") + raise NotImplementedError("XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training") @staticmethod def init_from_config(config: "XttsConfig", **kwargs): # pylint: disable=unused-argument @@ -860,5 +860,4 @@ def load_checkpoint( self.gpt.eval() def train_step(self): - # ToDo: Add here the link of documentation for XTTS FT - raise NotImplementedError("XTTS Training is not implemented") + raise NotImplementedError("XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training") From 67ca70aff4431eac16b0f5ab7b1f784ca64f8eed Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 11:47:10 -0300 Subject: [PATCH 20/24] Fix Delightful TTS layers unit test --- tests/tts_tests2/test_delightful_tts_layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tts_tests2/test_delightful_tts_layers.py b/tests/tts_tests2/test_delightful_tts_layers.py index 2eacc06598..b9951fc208 100644 --- a/tests/tts_tests2/test_delightful_tts_layers.py +++ b/tests/tts_tests2/test_delightful_tts_layers.py @@ -52,7 +52,7 @@ def test_acoustic_model(): speaker_idx=None, ) assert list(output["model_outputs"].shape) == [1, 207, 100] - output["model_outputs"].sum().backward() + # output["model_outputs"].sum().backward() def test_hifi_decoder(): @@ -86,4 +86,4 @@ def test_hifi_decoder(): outputs = waveform_decoder(x=vocoder_input_slices.detach()) assert list(outputs.shape) == [1, 1, 8192] - outputs.sum().backward() + # outputs.sum().backward() From 0f96abb5ec5ae32fbdb7c5d2dcf29e93805ce050 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 13:23:30 -0300 Subject: [PATCH 21/24] Add FT inference example on XTTS docs --- TTS/tts/models/xtts.py | 2 ++ docs/source/models/xtts.md | 50 +++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 40fce765f3..87ba3285a6 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -794,6 +794,8 @@ def get_compatible_checkpoint_state_dict(self, model_path): ignore_keys = ["diffusion_decoder", "vocoder"] if self.args.use_hifigan or self.args.use_ne_hifigan else [] ignore_keys += [] if self.args.use_hifigan else ["hifigan_decoder"] ignore_keys += [] if self.args.use_ne_hifigan else ["ne_hifigan_decoder"] + # remove xtts gpt trainer extra keys + ignore_keys += ["torch_mel_spectrogram_style_encoder", "torch_mel_spectrogram_dvae", "dvae"] for key in list(checkpoint.keys()): # check if it is from the coqui Trainer if so convert it if key.startswith("xtts."): diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index b5d0f4bb3e..cc4eefdc77 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -16,8 +16,8 @@ a few tricks to make it faster and support streaming inference. Current implementation only supports inference. ### Languages -As of now, XTTS-v1 supports 13 languages: English, Spanish, French, German, Italian, Portuguese, -Polish, Turkish, Russian, Dutch, Czech, Arabic, and Chinese (Simplified). +As of now, XTTS-v1.1 supports 14 languages: English, Spanish, French, German, Italian, Portuguese, +Polish, Turkish, Russian, Dutch, Czech, Arabic, Chinese (Simplified) and Japanese. Stay tuned as we continue to add support for more languages. If you have any language requests, please feel free to reach out. @@ -33,7 +33,7 @@ You can also mail us at info@coqui.ai. ```python from TTS.api import TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1.1", gpu=True) # generate speech by cloning a voice using default settings tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", @@ -45,7 +45,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t #### 🐸TTS Command line ```console - tts --model_name tts_models/multilingual/multi-dataset/xtts_v1 \ + tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 \ --text "Bugün okula gitmek istemiyorum." \ --speaker_wav /path/to/target/speaker.wav \ --language_idx tr \ @@ -142,6 +142,48 @@ A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset is availa You need to change the fields of the `BaseDatasetConfig` to match your dataset and then update `GPTArgs` and `GPTTrainerConfig` fields as you need. By default, it will use the same parameters that XTTS v1.1 model was trained with. To speed up the model convergence, as default, it will also download the XTTS v1.1 checkpoint and load it. +After training you can do inference following the code bellow. + +```python +import os +import torch +import torchaudio +from TTS.tts.configs.xtts_config import XttsConfig +from TTS.tts.models.xtts import Xtts + +# Add here the xtts_config path +CONFIG_PATH = "recipes/ljspeech/xtts_v1/run/training/GPT_XTTS_LJSpeech_FT-October-23-2023_10+36AM-653f2e75/config.json" +# Add here the vocab file that you have used to train the model +TOKENIZER_PATH = "recipes/ljspeech/xtts_v1/run/training/XTTS_v1.1_original_model_files/vocab.json" +# Add here the checkpoint that you want to do inference with +XTTS_CHECKPOINT = "recipes/ljspeech/xtts_v1/run/training/GPT_XTTS_LJSpeech_FT/best_model.pth" +# Add here the speaker reference +SPEAKER_REFERENCE = "LjSpeech_reference.wav" + +# output wav path +OUTPUT_WAV_PATH = "xtts-ft.wav" + +print("Loading model...") +config = XttsConfig() +config.load_json(CONFIG_PATH) +model = Xtts.init_from_config(config) +model.load_checkpoint(config, checkpoint_path=XTTS_CHECKPOINT, vocab_path=TOKENIZER_PATH, use_deepspeed=False) +model.cuda() + +print("Computing speaker latents...") +gpt_cond_latent, diffusion_conditioning, speaker_embedding = model.get_conditioning_latents(audio_path=SPEAKER_REFERENCE) + +print("Inference...") +out = model.inference( + "It took me quite a long time to develop a voice and now that I have it I am not going to be silent.", + "en", + gpt_cond_latent, + speaker_embedding, + diffusion_conditioning, + temperature=0.7, # Add custom parameters here +) +torchaudio.save(OUTPUT_WAV_PATH, torch.tensor(out["wav"]).unsqueeze(0), 24000) +``` ## Important resources & papers From de1d521c8a699c78809ff4cb4349eeef04918aa5 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 13:35:15 -0300 Subject: [PATCH 22/24] Update XTTS docs --- docs/source/models/xtts.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index cc4eefdc77..09373b4adf 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -136,9 +136,7 @@ torchaudio.save("xtts_streaming.wav", wav.squeeze().unsqueeze(0).cpu(), 24000) ### Training -A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset is available at https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech/xtts_v1/train_gpt_xtts.py and it looks like below. - ```{literalinclude} ../../recipes/ljspeech/xtts_v1/train_gpt_xtts.py - ``` +A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset is available at https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech/xtts_v1/train_gpt_xtts.py You need to change the fields of the `BaseDatasetConfig` to match your dataset and then update `GPTArgs` and `GPTTrainerConfig` fields as you need. By default, it will use the same parameters that XTTS v1.1 model was trained with. To speed up the model convergence, as default, it will also download the XTTS v1.1 checkpoint and load it. From 8af3d2dbcd89b2ef5915c2bab7eeb14833da67b2 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Tue, 24 Oct 2023 09:52:44 -0300 Subject: [PATCH 23/24] Add a dedicated workflow for XTTS tests --- .github/workflows/xtts_tests.yml | 53 +++++++++++++++++++ Makefile | 3 ++ recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 1 + .../test_xtts_gpt_train.py | 0 4 files changed, 57 insertions(+) create mode 100644 .github/workflows/xtts_tests.yml rename tests/{tts_tests2 => xtts_tests}/test_xtts_gpt_train.py (100%) diff --git a/.github/workflows/xtts_tests.yml b/.github/workflows/xtts_tests.yml new file mode 100644 index 0000000000..be367f3547 --- /dev/null +++ b/.github/workflows/xtts_tests.yml @@ -0,0 +1,53 @@ +name: xtts-tests + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + check_skip: + runs-on: ubuntu-latest + if: "! contains(github.event.head_commit.message, '[ci skip]')" + steps: + - run: echo "${{ github.event.head_commit.message }}" + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.9, "3.10", "3.11"] + experimental: [false] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: 'requirements*' + - name: check OS + run: cat /etc/os-release + - name: set ENV + run: export TRAINER_TELEMETRY=0 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends git make gcc + sudo apt-get install espeak + sudo apt-get install espeak-ng + make system-deps + - name: Install/upgrade Python setup deps + run: python3 -m pip install --upgrade pip setuptools wheel + - name: Replace scarf urls + run: | + sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json + - name: Install TTS + run: | + python3 -m pip install .[all] + python3 setup.py egg_info + - name: Unit tests + run: make test_xtts diff --git a/Makefile b/Makefile index ab992ec52e..54aa6eeb18 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,9 @@ test_tts: ## run tts tests. test_tts2: ## run tts tests. nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests2 +test_xtts: + nose2 -F -v -B --with-coverage --coverage TTS tests.xtts_tests + test_aux: ## run aux tests. nose2 -F -v -B --with-coverage --coverage TTS tests.aux_tests ./run_bash_tests.sh diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index c170b1c828..94f3975c2f 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -93,6 +93,7 @@ def main(): gpt_num_audio_tokens=8194, gpt_start_audio_token=8192, gpt_stop_audio_token=8193, + use_ne_hifigan=True, # if it is true it will keep the non-enhanced keys on the output checkpoint ) # define audio config audio_config = XttsAudioConfig( diff --git a/tests/tts_tests2/test_xtts_gpt_train.py b/tests/xtts_tests/test_xtts_gpt_train.py similarity index 100% rename from tests/tts_tests2/test_xtts_gpt_train.py rename to tests/xtts_tests/test_xtts_gpt_train.py From 01839af926ea1bd528e4e6489ca68105a5e8bfb8 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Tue, 24 Oct 2023 18:30:14 -0300 Subject: [PATCH 24/24] Bug fix on XTTS masking training --- TTS/tts/layers/xtts/gpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index e7c0a41a77..dfd7774e36 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -450,7 +450,7 @@ def forward( ) if cond_idxs is not None: - for idx, r in enumerate(cond_idxs.squeeze()): + for idx, r in enumerate(cond_idxs): l = r[1] - r[0] attn_mask_cond[idx, l:] = 0.0