| authors: false | |
| cite: false | |
| build-info: "" | |
| workspace: -8000 | |
| log: train.log | |
| log-level: info | |
| log-time-zone: PST8PDT | |
| quiet: false | |
| quiet-translation: true | |
| seed: 141414 | |
| check-nan: false | |
| interpolate-env-vars: true | |
| relative-paths: false | |
| dump-config: "" | |
| sigterm: save-and-exit | |
| model: model_files/model.npz | |
| pretrained-model: "" | |
| ignore-model-config: false | |
| type: transformer | |
| dim-vocabs: | |
| - 8000 | |
| - 8000 | |
| dim-emb: 1024 | |
| factors-dim-emb: 0 | |
| factors-combine: sum | |
| lemma-dependency: "" | |
| lemma-dim-emb: 0 | |
| dim-rnn: 1024 | |
| enc-type: bidirectional | |
| enc-cell: gru | |
| enc-cell-depth: 1 | |
| enc-depth: 6 | |
| dec-cell: gru | |
| dec-cell-base-depth: 2 | |
| dec-cell-high-depth: 1 | |
| dec-depth: 6 | |
| skip: false | |
| layer-normalization: false | |
| right-left: false | |
| input-types: | |
| [] | |
| tied-embeddings: true | |
| tied-embeddings-src: false | |
| tied-embeddings-all: true | |
| output-omit-bias: false | |
| transformer-heads: 8 | |
| transformer-no-projection: false | |
| transformer-rnn-projection: false | |
| transformer-pool: false | |
| transformer-dim-ffn: 8192 | |
| transformer-decoder-dim-ffn: 8192 | |
| transformer-ffn-depth: 2 | |
| transformer-decoder-ffn-depth: 0 | |
| transformer-ffn-activation: relu | |
| transformer-dim-aan: 2048 | |
| transformer-aan-depth: 2 | |
| transformer-aan-activation: swish | |
| transformer-aan-nogate: false | |
| transformer-decoder-autoreg: self-attention | |
| transformer-tied-layers: [] | |
| transformer-guided-alignment-layer: last | |
| transformer-preprocess: "" | |
| transformer-postprocess-emb: d | |
| transformer-postprocess: dan | |
| transformer-postprocess-top: "" | |
| transformer-train-position-embeddings: false | |
| transformer-depth-scaling: true | |
| transformer-no-bias: false | |
| transformer-no-affine: false | |
| bert-mask-symbol: "[MASK]" | |
| bert-sep-symbol: "[SEP]" | |
| bert-class-symbol: "[CLS]" | |
| bert-masking-fraction: 0.15 | |
| bert-train-type-embeddings: true | |
| bert-type-vocab-size: 2 | |
| comet-final-sigmoid: false | |
| comet-mix: false | |
| comet-mix-norm: false | |
| comet-dropout: 0.1 | |
| comet-mixup: 0 | |
| comet-mixup-reg: false | |
| comet-pooler-ffn: | |
| - 2048 | |
| - 1024 | |
| comet-prepend-zero: false | |
| dropout-rnn: 0 | |
| dropout-src: 0 | |
| dropout-trg: 0 | |
| transformer-dropout: 0.1 | |
| transformer-dropout-attention: 0 | |
| transformer-dropout-ffn: 0.1 | |
| cost-type: ce-sum | |
| multi-loss-type: sum | |
| unlikelihood-loss: false | |
| overwrite: false | |
| overwrite-checkpoint: true | |
| no-reload: false | |
| train-sets: | |
| - stdin | |
| vocabs: | |
| - vocab | |
| - vocab | |
| sentencepiece-alphas: | |
| [] | |
| sentencepiece-options: "" | |
| sentencepiece-max-lines: 2000000 | |
| no-spm-encode: false | |
| after-epochs: 0 | |
| after-batches: 0 | |
| after: 40e | |
| disp-freq: 100Mt | |
| disp-first: 10 | |
| disp-label-counts: true | |
| save-freq: 1Gt | |
| logical-epoch: | |
| - 1Gt | |
| max-length: 256 | |
| max-length-crop: false | |
| tsv: true | |
| tsv-fields: 2 | |
| shuffle: batches | |
| no-restore-corpus: true | |
| tempdir: /tmp | |
| sqlite: "" | |
| sqlite-drop: false | |
| devices: | |
| - 0 | |
| - 1 | |
| no-nccl: false | |
| sharding: local | |
| sync-freq: 200u | |
| cpu-threads: 0 | |
| mini-batch: 1000 | |
| mini-batch-words: 500000 | |
| mini-batch-fit: true | |
| mini-batch-fit-step: 5 | |
| gradient-checkpointing: false | |
| maxi-batch: 1000 | |
| maxi-batch-sort: trg | |
| shuffle-in-ram: true | |
| data-threads: 8 | |
| all-caps-every: 0 | |
| english-title-case-every: 0 | |
| mini-batch-words-ref: 0 | |
| mini-batch-warmup: 4000 | |
| mini-batch-track-lr: false | |
| mini-batch-round-up: true | |
| optimizer: adam | |
| optimizer-params: | |
| - 0.9 | |
| - 0.999 | |
| - 1e-08 | |
| - 0.01 | |
| optimizer-delay: 1 | |
| sync-sgd: true | |
| learn-rate: 0.0005 | |
| lr-report: true | |
| lr-decay: 0 | |
| lr-decay-strategy: epoch+stalled | |
| lr-decay-start: | |
| - 10 | |
| - 1 | |
| lr-decay-freq: 50000 | |
| lr-decay-reset-optimizer: false | |
| lr-decay-repeat-warmup: false | |
| lr-decay-inv-sqrt: | |
| - 4000 | |
| lr-warmup: 4000 | |
| lr-warmup-start-rate: 0 | |
| lr-warmup-cycle: false | |
| lr-warmup-at-reload: false | |
| label-smoothing: 0.1 | |
| factor-weight: 1 | |
| clip-norm: 0 | |
| exponential-smoothing: 1e-3 | |
| exponential-smoothing-replace-freq: 0 | |
| guided-alignment: none | |
| guided-alignment-cost: ce | |
| guided-alignment-weight: 0 | |
| data-weighting: "" | |
| data-weighting-type: sentence | |
| embedding-vectors: | |
| [] | |
| embedding-normalization: false | |
| embedding-fix-src: false | |
| embedding-fix-trg: false | |
| precision: | |
| - float32 | |
| - float32 | |
| cost-scaling: | |
| - 256.f | |
| - 10000 | |
| - 1.f | |
| - 256.f | |
| throw-on-divergence: | |
| [] | |
| custom-fallbacks: | |
| [] | |
| gradient-norm-average-window: 100 | |
| dynamic-gradient-scaling: | |
| - 2 | |
| - log | |
| check-gradient-nan: false | |
| normalize-gradient: false | |
| train-embedder-rank: | |
| [] | |
| quantize-bits: 0 | |
| quantize-optimization-steps: 0 | |
| quantize-log-based: false | |
| quantize-biases: false | |
| ulr: false | |
| ulr-query-vectors: "" | |
| ulr-keys-vectors: "" | |
| ulr-trainable-transformation: false | |
| ulr-dim-emb: 0 | |
| ulr-dropout: 0 | |
| ulr-softmax-temperature: 1 | |
| valid-sets: | |
| - dev.en-de | |
| valid-freq: 1Gt | |
| valid-metrics: | |
| - perplexity | |
| - ce-mean-words | |
| - bleu | |
| - chrf | |
| valid-reset-stalled: false | |
| valid-reset-all: false | |
| early-stopping: 40 | |
| early-stopping-epsilon: | |
| - 0 | |
| early-stopping-on: first | |
| beam-size: 4 | |
| normalize: 1.0 | |
| max-length-factor: 3 | |
| word-penalty: 0.0 | |
| allow-unk: false | |
| n-best: false | |
| word-scores: false | |
| valid-mini-batch: 32 | |
| valid-max-length: 1000 | |
| valid-script-path: "" | |
| valid-script-args: | |
| [] | |
| valid-translation-output: valid.trg.output | |
| keep-best: true | |
| valid-log: valid.log |