# Model options type: transformer dim-emb: 512 enc-depth: 6 dec-depth: 6 tied-embeddings-all: false transformer-heads: 8 transformer-dim-ffn: 2048 transformer-ffn-activation: relu transformer-preprocess: "" transformer-postprocess: dan transformer-dropout: 0.1 # Training options cost-type: ce-mean-words max-length: 100 mini-batch: 1000 mini-batch-fit: true maxi-batch: 1000 optimizer-params: - 0.9 - 0.98 - 1e-09 sync-sgd: true learn-rate: 0.0003 lr-decay-inv-sqrt: - 16000 lr-warmup: 16000 label-smoothing: 0.1 clip-norm: 0 exponential-smoothing: 0.0001 disp-freq: 50 disp-first: 10 save-freq: 10ku early-stopping: 10 # Validation set options keep-best: true beam-size: 8 normalize: 1 valid-freq: 20ku valid-metrics: - ce-mean-words - bleu - perplexity valid-mini-batch: 64 train-sets: - /mnt/d/model2train/data/src-train.txt - /mnt/d/model2train/data/tgt-train.txt data-weighting-type: sentence data-weighting: /mnt/d/model2train/data/weight-train.txt valid-sets: - /mnt/d/model2train/data/src-val.txt - /mnt/d/model2train/data/tgt-val.txt model: /home/ogaith/model2train/model.npz vocabs: - /mnt/d/model2train/data/src-vocab.yml - /mnt/d/model2train/data/tgt-vocab.yml log: /home/ubuntu/model2train/train.log valid-log: /home/ubuntu/model2train/valid.log after: 10e workspace: 8000