| !ModelConfig | |
| config_data: !DataConfig | |
| data_statistics: !DataStatistics | |
| average_len_target_per_bucket: | |
| - 2.0 | |
| - 4.08629215026444 | |
| - 4.545226437122044 | |
| - 4.607174400985307 | |
| - 4.87920905184137 | |
| - 4.982440502718691 | |
| - 5.27049292873817 | |
| - 5.803196211897 | |
| - 6.028941176470578 | |
| - 6.834719710669081 | |
| - 7.802691790040373 | |
| - 15.045793000744581 | |
| - 8.642659279778403 | |
| - 8.640957446808516 | |
| - 10.678657074340526 | |
| - 9.13441955193482 | |
| - 9.305970149253735 | |
| - 8.974063400576362 | |
| - 10.971887550200805 | |
| - 13.464285714285717 | |
| - 13.232323232323239 | |
| - 11.324468085106396 | |
| - 13.984732824427489 | |
| - 16.142276422764233 | |
| - 16.61032863849766 | |
| - 16.427509293680295 | |
| - 16.655589123867063 | |
| - 19.258675078864364 | |
| - 21.614285714285707 | |
| - 21.643258426966298 | |
| - 20.918478260869566 | |
| - 22.957871396895783 | |
| - 23.638766519823815 | |
| - 24.167002012072434 | |
| - 25.362318840579718 | |
| - 25.95375722543352 | |
| - 26.010489510489492 | |
| - 27.22000000000001 | |
| - 26.959999999999987 | |
| - 27.297697368421066 | |
| - 28.040998217468793 | |
| - 30.042830540037233 | |
| - 29.966735966735982 | |
| - 30.934859154929573 | |
| - 30.47868217054262 | |
| - 30.527777777777782 | |
| - 31.492779783393505 | |
| - 32.51171874999999 | |
| - 32.6358024691358 | |
| - 34.461538461538474 | |
| - 32.60769230769231 | |
| - 5.5 | |
| - 43.0 | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| - null | |
| buckets: | |
| - !!python/tuple | |
| - 8 | |
| - 8 | |
| - !!python/tuple | |
| - 16 | |
| - 16 | |
| - !!python/tuple | |
| - 24 | |
| - 24 | |
| - !!python/tuple | |
| - 32 | |
| - 32 | |
| - !!python/tuple | |
| - 40 | |
| - 40 | |
| - !!python/tuple | |
| - 48 | |
| - 48 | |
| - !!python/tuple | |
| - 56 | |
| - 56 | |
| - !!python/tuple | |
| - 64 | |
| - 64 | |
| - !!python/tuple | |
| - 72 | |
| - 72 | |
| - !!python/tuple | |
| - 80 | |
| - 80 | |
| - !!python/tuple | |
| - 88 | |
| - 88 | |
| - !!python/tuple | |
| - 96 | |
| - 96 | |
| - !!python/tuple | |
| - 104 | |
| - 104 | |
| - !!python/tuple | |
| - 112 | |
| - 112 | |
| - !!python/tuple | |
| - 120 | |
| - 120 | |
| - !!python/tuple | |
| - 128 | |
| - 128 | |
| - !!python/tuple | |
| - 136 | |
| - 129 | |
| - !!python/tuple | |
| - 144 | |
| - 129 | |
| - !!python/tuple | |
| - 152 | |
| - 129 | |
| - !!python/tuple | |
| - 160 | |
| - 129 | |
| - !!python/tuple | |
| - 168 | |
| - 129 | |
| - !!python/tuple | |
| - 176 | |
| - 129 | |
| - !!python/tuple | |
| - 184 | |
| - 129 | |
| - !!python/tuple | |
| - 192 | |
| - 129 | |
| - !!python/tuple | |
| - 200 | |
| - 129 | |
| - !!python/tuple | |
| - 208 | |
| - 129 | |
| - !!python/tuple | |
| - 216 | |
| - 129 | |
| - !!python/tuple | |
| - 224 | |
| - 129 | |
| - !!python/tuple | |
| - 232 | |
| - 129 | |
| - !!python/tuple | |
| - 240 | |
| - 129 | |
| - !!python/tuple | |
| - 248 | |
| - 129 | |
| - !!python/tuple | |
| - 256 | |
| - 129 | |
| - !!python/tuple | |
| - 264 | |
| - 129 | |
| - !!python/tuple | |
| - 272 | |
| - 129 | |
| - !!python/tuple | |
| - 280 | |
| - 129 | |
| - !!python/tuple | |
| - 288 | |
| - 129 | |
| - !!python/tuple | |
| - 296 | |
| - 129 | |
| - !!python/tuple | |
| - 304 | |
| - 129 | |
| - !!python/tuple | |
| - 312 | |
| - 129 | |
| - !!python/tuple | |
| - 320 | |
| - 129 | |
| - !!python/tuple | |
| - 328 | |
| - 129 | |
| - !!python/tuple | |
| - 336 | |
| - 129 | |
| - !!python/tuple | |
| - 344 | |
| - 129 | |
| - !!python/tuple | |
| - 352 | |
| - 129 | |
| - !!python/tuple | |
| - 360 | |
| - 129 | |
| - !!python/tuple | |
| - 368 | |
| - 129 | |
| - !!python/tuple | |
| - 376 | |
| - 129 | |
| - !!python/tuple | |
| - 384 | |
| - 129 | |
| - !!python/tuple | |
| - 392 | |
| - 129 | |
| - !!python/tuple | |
| - 400 | |
| - 129 | |
| - !!python/tuple | |
| - 408 | |
| - 129 | |
| - !!python/tuple | |
| - 416 | |
| - 129 | |
| - !!python/tuple | |
| - 424 | |
| - 129 | |
| - !!python/tuple | |
| - 432 | |
| - 129 | |
| - !!python/tuple | |
| - 440 | |
| - 129 | |
| - !!python/tuple | |
| - 448 | |
| - 129 | |
| - !!python/tuple | |
| - 456 | |
| - 129 | |
| - !!python/tuple | |
| - 464 | |
| - 129 | |
| - !!python/tuple | |
| - 472 | |
| - 129 | |
| - !!python/tuple | |
| - 480 | |
| - 129 | |
| - !!python/tuple | |
| - 488 | |
| - 129 | |
| - !!python/tuple | |
| - 496 | |
| - 129 | |
| - !!python/tuple | |
| - 504 | |
| - 129 | |
| - !!python/tuple | |
| - 512 | |
| - 129 | |
| - !!python/tuple | |
| - 513 | |
| - 129 | |
| length_ratio_mean: 0.16320710693441579 | |
| length_ratio_stats_per_bucket: | |
| - !!python/tuple | |
| - 0.3333333333333333 | |
| - 0.0 | |
| - !!python/tuple | |
| - 0.28246393697985434 | |
| - 0.17868752447804973 | |
| - !!python/tuple | |
| - 0.21840710265332788 | |
| - 0.1330505772378312 | |
| - !!python/tuple | |
| - 0.16560142798704922 | |
| - 0.09581195473826641 | |
| - !!python/tuple | |
| - 0.13801367492489092 | |
| - 0.12429965021659338 | |
| - !!python/tuple | |
| - 0.11863212215522084 | |
| - 0.1208393385452983 | |
| - !!python/tuple | |
| - 0.10151133866588294 | |
| - 0.11072333780515448 | |
| - !!python/tuple | |
| - 0.09838819717267734 | |
| - 0.12557601720946082 | |
| - !!python/tuple | |
| - 0.09394884997066442 | |
| - 0.16159177653077658 | |
| - !!python/tuple | |
| - 0.10402554625981722 | |
| - 0.26083679437294416 | |
| - !!python/tuple | |
| - 0.1012342945734544 | |
| - 0.20099512839826167 | |
| - !!python/tuple | |
| - 0.17281772320739658 | |
| - 0.257545103018524 | |
| - !!python/tuple | |
| - 0.10845391475564008 | |
| - 0.3239959561352876 | |
| - !!python/tuple | |
| - 0.09935073708696769 | |
| - 0.3059573403277105 | |
| - !!python/tuple | |
| - 0.12635011083619693 | |
| - 0.3907243857496131 | |
| - !!python/tuple | |
| - 0.08949294838769961 | |
| - 0.3144956594612652 | |
| - !!python/tuple | |
| - 0.07007331783529426 | |
| - 0.05774973922713284 | |
| - !!python/tuple | |
| - 0.06411727035132861 | |
| - 0.04726139664600602 | |
| - !!python/tuple | |
| - 0.07382609782015778 | |
| - 0.050521761108029695 | |
| - !!python/tuple | |
| - 0.08590157138956556 | |
| - 0.05569960282284095 | |
| - !!python/tuple | |
| - 0.08062786352616935 | |
| - 0.058790770283216505 | |
| - !!python/tuple | |
| - 0.06563222282256796 | |
| - 0.05450274064413921 | |
| - !!python/tuple | |
| - 0.07765999502184046 | |
| - 0.05057167990395854 | |
| - !!python/tuple | |
| - 0.08555873649959676 | |
| - 0.05960592960682603 | |
| - !!python/tuple | |
| - 0.08460026909745419 | |
| - 0.05238594583690578 | |
| - !!python/tuple | |
| - 0.08041439956489124 | |
| - 0.04792844419538253 | |
| - !!python/tuple | |
| - 0.07859928415542815 | |
| - 0.05072523580179588 | |
| - !!python/tuple | |
| - 0.08720935617277471 | |
| - 0.04382289790728185 | |
| - !!python/tuple | |
| - 0.0945785744419209 | |
| - 0.05412799726318098 | |
| - !!python/tuple | |
| - 0.09147639273741005 | |
| - 0.04623369327444139 | |
| - !!python/tuple | |
| - 0.08566937441195915 | |
| - 0.03729904156233976 | |
| - !!python/tuple | |
| - 0.09089250053653752 | |
| - 0.03997864088737986 | |
| - !!python/tuple | |
| - 0.09074467762227163 | |
| - 0.03321456251239067 | |
| - !!python/tuple | |
| - 0.09005889312946631 | |
| - 0.03317108879820214 | |
| - !!python/tuple | |
| - 0.09171046510420815 | |
| - 0.035431771966381115 | |
| - !!python/tuple | |
| - 0.09113688227781093 | |
| - 0.03302006652634936 | |
| - !!python/tuple | |
| - 0.08893255045731784 | |
| - 0.03625826248767216 | |
| - !!python/tuple | |
| - 0.09050986731593187 | |
| - 0.03584124694886162 | |
| - !!python/tuple | |
| - 0.08733945286928807 | |
| - 0.02963045027026122 | |
| - !!python/tuple | |
| - 0.08629641342788141 | |
| - 0.030504004395265606 | |
| - !!python/tuple | |
| - 0.08640318897032012 | |
| - 0.030675309542199148 | |
| - !!python/tuple | |
| - 0.09036990980396087 | |
| - 0.03191849333402471 | |
| - !!python/tuple | |
| - 0.08807433400728253 | |
| - 0.03290646726223996 | |
| - !!python/tuple | |
| - 0.08866806731559626 | |
| - 0.03728726453805084 | |
| - !!python/tuple | |
| - 0.08543343575292071 | |
| - 0.02718762541630789 | |
| - !!python/tuple | |
| - 0.08370713516111211 | |
| - 0.030215511135668078 | |
| - !!python/tuple | |
| - 0.08456814843330018 | |
| - 0.02546164231510412 | |
| - !!python/tuple | |
| - 0.08555768005742968 | |
| - 0.026243070552042298 | |
| - !!python/tuple | |
| - 0.08406679695149158 | |
| - 0.02438826052491033 | |
| - !!python/tuple | |
| - 0.08695308402142944 | |
| - 0.02926245130243095 | |
| - !!python/tuple | |
| - 0.08093989759976632 | |
| - 0.032639773078865474 | |
| - !!python/tuple | |
| - 0.013364278458885503 | |
| - 0.0011988283372310012 | |
| - !!python/tuple | |
| - 0.10311750599520383 | |
| - 0.007770672300249229 | |
| - &id001 !!python/tuple | |
| - null | |
| - null | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| - *id001 | |
| length_ratio_std: 0.1371393774100467 | |
| max_observed_len_source: 417 | |
| max_observed_len_target: 128 | |
| num_discarded: 20 | |
| num_sents: 354707 | |
| num_sents_per_bucket: | |
| - 1 | |
| - 29122 | |
| - 57389 | |
| - 123411 | |
| - 36766 | |
| - 44876 | |
| - 21849 | |
| - 6758 | |
| - 8500 | |
| - 2765 | |
| - 3715 | |
| - 2686 | |
| - 722 | |
| - 752 | |
| - 417 | |
| - 491 | |
| - 402 | |
| - 347 | |
| - 249 | |
| - 196 | |
| - 297 | |
| - 376 | |
| - 262 | |
| - 246 | |
| - 213 | |
| - 269 | |
| - 331 | |
| - 317 | |
| - 350 | |
| - 356 | |
| - 368 | |
| - 451 | |
| - 454 | |
| - 497 | |
| - 483 | |
| - 519 | |
| - 572 | |
| - 500 | |
| - 550 | |
| - 608 | |
| - 561 | |
| - 537 | |
| - 481 | |
| - 568 | |
| - 516 | |
| - 468 | |
| - 554 | |
| - 512 | |
| - 486 | |
| - 455 | |
| - 130 | |
| - 2 | |
| - 4 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| - 0 | |
| num_tokens_source: 15626084 | |
| num_tokens_target: 2020240 | |
| num_unks_source: 1 | |
| num_unks_target: 116 | |
| size_vocab_source: 1232 | |
| size_vocab_target: 5976 | |
| eop_id: -1 | |
| max_seq_len_source: 513 | |
| max_seq_len_target: 129 | |
| num_source_factors: 1 | |
| num_target_factors: 1 | |
| config_decoder: !TransformerConfig | |
| act_type: relu | |
| attention_heads: 8 | |
| block_prepended_cross_attention: false | |
| decoder_type: transformer | |
| depth_key_value: 512 | |
| dropout_act: 0.1 | |
| dropout_attention: 0.1 | |
| dropout_prepost: 0.1 | |
| feed_forward_num_hidden: 2048 | |
| max_seq_len_source: 513 | |
| max_seq_len_target: 129 | |
| model_size: 512 | |
| num_layers: 6 | |
| positional_embedding_type: fixed | |
| postprocess_sequence: dr | |
| preprocess_sequence: n | |
| use_glu: false | |
| use_lhuc: false | |
| config_embed_source: !EmbeddingConfig | |
| allow_sparse_grad: false | |
| dropout: 0.0 | |
| factor_configs: null | |
| num_embed: 512 | |
| num_factors: 1 | |
| vocab_size: 1232 | |
| config_embed_target: !EmbeddingConfig | |
| allow_sparse_grad: false | |
| dropout: 0.0 | |
| factor_configs: null | |
| num_embed: 512 | |
| num_factors: 1 | |
| vocab_size: 5976 | |
| config_encoder: !TransformerConfig | |
| act_type: relu | |
| attention_heads: 8 | |
| block_prepended_cross_attention: false | |
| decoder_type: transformer | |
| depth_key_value: 512 | |
| dropout_act: 0.1 | |
| dropout_attention: 0.1 | |
| dropout_prepost: 0.1 | |
| feed_forward_num_hidden: 2048 | |
| max_seq_len_source: 513 | |
| max_seq_len_target: 129 | |
| model_size: 512 | |
| num_layers: 6 | |
| positional_embedding_type: fixed | |
| postprocess_sequence: dr | |
| preprocess_sequence: n | |
| use_glu: false | |
| use_lhuc: false | |
| config_length_task: null | |
| dtype: float32 | |
| lhuc: false | |
| neural_vocab_selection: null | |
| neural_vocab_selection_block_loss: false | |
| vocab_source_size: 1232 | |
| vocab_target_size: 5976 | |
| weight_tying_type: none | |