diff --git "a/checkpoint-992160/trainer_state.json" "b/checkpoint-992160/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-992160/trainer_state.json" @@ -0,0 +1,11751 @@ +{ + "best_metric": 3.8424651622772217, + "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/det-noun/transformer/1/checkpoints/checkpoint-915840", + "epoch": 1.0250006060157382, + "eval_steps": 10, + "global_step": 992160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998362119627e-05, + "loss": 10.9527, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.999161405248948e-05, + "loss": 6.8288, + "step": 512 + }, + { + "epoch": 0.0, + "learning_rate": 4.998322810497896e-05, + "loss": 6.1822, + "step": 1024 + }, + { + "epoch": 0.0, + "learning_rate": 4.997484215746844e-05, + "loss": 5.9721, + "step": 1536 + }, + { + "epoch": 0.0, + "learning_rate": 4.996645620995792e-05, + "loss": 5.8095, + "step": 2048 + }, + { + "epoch": 0.0, + "learning_rate": 4.99580702624474e-05, + "loss": 5.7019, + "step": 2560 + }, + { + "epoch": 0.0, + "learning_rate": 4.994968431493688e-05, + "loss": 5.6048, + "step": 3072 + }, + { + "epoch": 0.0, + "learning_rate": 4.994129836742636e-05, + "loss": 5.5401, + "step": 3584 + }, + { + "epoch": 0.0, + "learning_rate": 4.993291241991584e-05, + "loss": 5.4622, + "step": 4096 + }, + { + "epoch": 0.0, + "learning_rate": 4.992452647240532e-05, + "loss": 5.4041, + "step": 4608 + }, + { + "epoch": 0.0, + "learning_rate": 4.99161405248948e-05, + "loss": 5.3556, + "step": 5120 + }, + { + "epoch": 0.0, + "learning_rate": 4.990775457738428e-05, + "loss": 5.3181, + "step": 5632 + }, + { + "epoch": 0.0, + "learning_rate": 4.989936862987376e-05, + "loss": 5.2709, + "step": 6144 + }, + { + "epoch": 0.0, + "learning_rate": 4.989098268236324e-05, + "loss": 5.219, + "step": 6656 + }, + { + "epoch": 0.0, + "learning_rate": 4.988259673485272e-05, + "loss": 5.1885, + "step": 7168 + }, + { + "epoch": 0.0, + "learning_rate": 4.98742107873422e-05, + "loss": 5.1376, + "step": 7680 + }, + { + "epoch": 0.0, + "learning_rate": 4.986584121863541e-05, + "loss": 5.1312, + "step": 8192 + }, + { + "epoch": 0.0, + "learning_rate": 4.985745527112489e-05, + "loss": 5.0865, + "step": 8704 + }, + { + "epoch": 0.0, + "learning_rate": 4.984906932361437e-05, + "loss": 5.0558, + "step": 9216 + }, + { + "epoch": 0.0, + "learning_rate": 4.984068337610385e-05, + "loss": 5.0341, + "step": 9728 + }, + { + "epoch": 0.0, + "learning_rate": 4.983229742859333e-05, + "loss": 5.0215, + "step": 10240 + }, + { + "epoch": 0.0, + "learning_rate": 4.982391148108281e-05, + "loss": 4.9883, + "step": 10752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9815541912376026e-05, + "loss": 4.9645, + "step": 11264 + }, + { + "epoch": 0.0, + "learning_rate": 4.9807155964865506e-05, + "loss": 4.9438, + "step": 11776 + }, + { + "epoch": 0.0, + "learning_rate": 4.9798770017354986e-05, + "loss": 4.9289, + "step": 12288 + }, + { + "epoch": 0.0, + "learning_rate": 4.9790384069844466e-05, + "loss": 4.8992, + "step": 12800 + }, + { + "epoch": 0.0, + "learning_rate": 4.9782014501137675e-05, + "loss": 4.8845, + "step": 13312 + }, + { + "epoch": 0.0, + "learning_rate": 4.9773628553627155e-05, + "loss": 4.862, + "step": 13824 + }, + { + "epoch": 0.0, + "learning_rate": 4.9765242606116635e-05, + "loss": 4.8437, + "step": 14336 + }, + { + "epoch": 0.0, + "learning_rate": 4.9756856658606115e-05, + "loss": 4.8294, + "step": 14848 + }, + { + "epoch": 0.01, + "learning_rate": 4.9748487089899324e-05, + "loss": 4.815, + "step": 15360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9740101142388804e-05, + "loss": 4.8048, + "step": 15872 + }, + { + "epoch": 0.01, + "learning_rate": 4.9731715194878284e-05, + "loss": 4.7956, + "step": 16384 + }, + { + "epoch": 0.01, + "learning_rate": 4.97233456261715e-05, + "loss": 4.7775, + "step": 16896 + }, + { + "epoch": 0.01, + "learning_rate": 4.971495967866098e-05, + "loss": 4.7656, + "step": 17408 + }, + { + "epoch": 0.01, + "learning_rate": 4.970657373115046e-05, + "loss": 4.7512, + "step": 17920 + }, + { + "epoch": 0.01, + "learning_rate": 4.969818778363994e-05, + "loss": 4.743, + "step": 18432 + }, + { + "epoch": 0.01, + "learning_rate": 4.968981821493315e-05, + "loss": 4.7196, + "step": 18944 + }, + { + "epoch": 0.01, + "learning_rate": 4.968143226742263e-05, + "loss": 4.7183, + "step": 19456 + }, + { + "epoch": 0.01, + "learning_rate": 4.967304631991211e-05, + "loss": 4.6862, + "step": 19968 + }, + { + "epoch": 0.01, + "learning_rate": 4.966466037240159e-05, + "loss": 4.688, + "step": 20480 + }, + { + "epoch": 0.01, + "learning_rate": 4.965627442489107e-05, + "loss": 4.6823, + "step": 20992 + }, + { + "epoch": 0.01, + "learning_rate": 4.964788847738054e-05, + "loss": 4.682, + "step": 21504 + }, + { + "epoch": 0.01, + "learning_rate": 4.963950252987002e-05, + "loss": 4.6488, + "step": 22016 + }, + { + "epoch": 0.01, + "learning_rate": 4.96311165823595e-05, + "loss": 4.6559, + "step": 22528 + }, + { + "epoch": 0.01, + "learning_rate": 4.962274701365272e-05, + "loss": 4.6443, + "step": 23040 + }, + { + "epoch": 0.01, + "learning_rate": 4.96143610661422e-05, + "loss": 4.6337, + "step": 23552 + }, + { + "epoch": 0.01, + "learning_rate": 4.960597511863168e-05, + "loss": 4.6404, + "step": 24064 + }, + { + "epoch": 0.01, + "learning_rate": 4.959758917112116e-05, + "loss": 4.6009, + "step": 24576 + }, + { + "epoch": 0.01, + "learning_rate": 4.95892359812181e-05, + "loss": 4.605, + "step": 25088 + }, + { + "epoch": 0.01, + "learning_rate": 4.958085003370758e-05, + "loss": 4.6109, + "step": 25600 + }, + { + "epoch": 0.01, + "learning_rate": 4.957246408619706e-05, + "loss": 4.602, + "step": 26112 + }, + { + "epoch": 0.01, + "learning_rate": 4.956407813868654e-05, + "loss": 4.5827, + "step": 26624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9555692191176016e-05, + "loss": 4.564, + "step": 27136 + }, + { + "epoch": 0.01, + "learning_rate": 4.954732262246923e-05, + "loss": 4.572, + "step": 27648 + }, + { + "epoch": 0.01, + "learning_rate": 4.953893667495871e-05, + "loss": 4.5528, + "step": 28160 + }, + { + "epoch": 0.01, + "learning_rate": 4.953055072744819e-05, + "loss": 4.5765, + "step": 28672 + }, + { + "epoch": 0.01, + "learning_rate": 4.952216477993767e-05, + "loss": 4.5292, + "step": 29184 + }, + { + "epoch": 0.01, + "learning_rate": 4.951377883242715e-05, + "loss": 4.5484, + "step": 29696 + }, + { + "epoch": 0.01, + "learning_rate": 4.950539288491663e-05, + "loss": 4.5372, + "step": 30208 + }, + { + "epoch": 0.01, + "learning_rate": 4.949700693740611e-05, + "loss": 4.5292, + "step": 30720 + }, + { + "epoch": 0.01, + "learning_rate": 4.948862098989559e-05, + "loss": 4.5327, + "step": 31232 + }, + { + "epoch": 0.01, + "learning_rate": 4.94802514211888e-05, + "loss": 4.5115, + "step": 31744 + }, + { + "epoch": 0.01, + "learning_rate": 4.947186547367828e-05, + "loss": 4.4977, + "step": 32256 + }, + { + "epoch": 0.01, + "learning_rate": 4.946347952616776e-05, + "loss": 4.4983, + "step": 32768 + }, + { + "epoch": 0.01, + "learning_rate": 4.945509357865724e-05, + "loss": 4.5134, + "step": 33280 + }, + { + "epoch": 0.01, + "learning_rate": 4.944672400995045e-05, + "loss": 4.4849, + "step": 33792 + }, + { + "epoch": 0.01, + "learning_rate": 4.943833806243993e-05, + "loss": 4.4864, + "step": 34304 + }, + { + "epoch": 0.01, + "learning_rate": 4.942995211492941e-05, + "loss": 4.4696, + "step": 34816 + }, + { + "epoch": 0.01, + "learning_rate": 4.942156616741889e-05, + "loss": 4.4701, + "step": 35328 + }, + { + "epoch": 0.01, + "learning_rate": 4.9413196598712105e-05, + "loss": 4.4856, + "step": 35840 + }, + { + "epoch": 0.01, + "learning_rate": 4.9404810651201585e-05, + "loss": 4.4814, + "step": 36352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9396424703691065e-05, + "loss": 4.4551, + "step": 36864 + }, + { + "epoch": 0.01, + "learning_rate": 4.9388038756180545e-05, + "loss": 4.4681, + "step": 37376 + }, + { + "epoch": 0.01, + "learning_rate": 4.9379669187473754e-05, + "loss": 4.4759, + "step": 37888 + }, + { + "epoch": 0.01, + "learning_rate": 4.9371283239963234e-05, + "loss": 4.4596, + "step": 38400 + }, + { + "epoch": 0.01, + "learning_rate": 4.936291367125644e-05, + "loss": 4.4378, + "step": 38912 + }, + { + "epoch": 0.01, + "learning_rate": 4.935452772374592e-05, + "loss": 4.4437, + "step": 39424 + }, + { + "epoch": 0.01, + "learning_rate": 4.93461417762354e-05, + "loss": 4.438, + "step": 39936 + }, + { + "epoch": 0.01, + "learning_rate": 4.933775582872488e-05, + "loss": 4.4301, + "step": 40448 + }, + { + "epoch": 0.01, + "learning_rate": 4.932936988121436e-05, + "loss": 4.4273, + "step": 40960 + }, + { + "epoch": 0.01, + "learning_rate": 4.932098393370384e-05, + "loss": 4.4274, + "step": 41472 + }, + { + "epoch": 0.01, + "learning_rate": 4.931259798619332e-05, + "loss": 4.43, + "step": 41984 + }, + { + "epoch": 0.01, + "learning_rate": 4.930421203868281e-05, + "loss": 4.4206, + "step": 42496 + }, + { + "epoch": 0.01, + "learning_rate": 4.929584246997602e-05, + "loss": 4.403, + "step": 43008 + }, + { + "epoch": 0.01, + "learning_rate": 4.92874565224655e-05, + "loss": 4.405, + "step": 43520 + }, + { + "epoch": 0.01, + "learning_rate": 4.927907057495498e-05, + "loss": 4.405, + "step": 44032 + }, + { + "epoch": 0.01, + "learning_rate": 4.927068462744446e-05, + "loss": 4.3998, + "step": 44544 + }, + { + "epoch": 0.01, + "learning_rate": 4.926231505873767e-05, + "loss": 4.3992, + "step": 45056 + }, + { + "epoch": 0.01, + "learning_rate": 4.925394549003088e-05, + "loss": 4.3904, + "step": 45568 + }, + { + "epoch": 0.02, + "learning_rate": 4.924555954252036e-05, + "loss": 4.386, + "step": 46080 + }, + { + "epoch": 0.02, + "learning_rate": 4.923717359500984e-05, + "loss": 4.3775, + "step": 46592 + }, + { + "epoch": 0.02, + "learning_rate": 4.9228804026303046e-05, + "loss": 4.3877, + "step": 47104 + }, + { + "epoch": 0.02, + "learning_rate": 4.922041807879253e-05, + "loss": 4.378, + "step": 47616 + }, + { + "epoch": 0.02, + "learning_rate": 4.921203213128201e-05, + "loss": 4.3792, + "step": 48128 + }, + { + "epoch": 0.02, + "learning_rate": 4.920364618377149e-05, + "loss": 4.3758, + "step": 48640 + }, + { + "epoch": 0.02, + "learning_rate": 4.919526023626097e-05, + "loss": 4.3613, + "step": 49152 + }, + { + "epoch": 0.02, + "learning_rate": 4.918687428875045e-05, + "loss": 4.3669, + "step": 49664 + }, + { + "epoch": 0.02, + "learning_rate": 4.917848834123993e-05, + "loss": 4.3582, + "step": 50176 + }, + { + "epoch": 0.02, + "learning_rate": 4.917010239372941e-05, + "loss": 4.3577, + "step": 50688 + }, + { + "epoch": 0.02, + "learning_rate": 4.916173282502262e-05, + "loss": 4.3596, + "step": 51200 + }, + { + "epoch": 0.02, + "learning_rate": 4.91533468775121e-05, + "loss": 4.3422, + "step": 51712 + }, + { + "epoch": 0.02, + "learning_rate": 4.914496093000158e-05, + "loss": 4.3496, + "step": 52224 + }, + { + "epoch": 0.02, + "learning_rate": 4.913657498249106e-05, + "loss": 4.347, + "step": 52736 + }, + { + "epoch": 0.02, + "learning_rate": 4.912818903498054e-05, + "loss": 4.3388, + "step": 53248 + }, + { + "epoch": 0.02, + "learning_rate": 4.911980308747002e-05, + "loss": 4.3296, + "step": 53760 + }, + { + "epoch": 0.02, + "learning_rate": 4.911143351876323e-05, + "loss": 4.3307, + "step": 54272 + }, + { + "epoch": 0.02, + "learning_rate": 4.910304757125272e-05, + "loss": 4.3325, + "step": 54784 + }, + { + "epoch": 0.02, + "learning_rate": 4.90946616237422e-05, + "loss": 4.3261, + "step": 55296 + }, + { + "epoch": 0.02, + "learning_rate": 4.908627567623168e-05, + "loss": 4.3254, + "step": 55808 + }, + { + "epoch": 0.02, + "learning_rate": 4.907788972872115e-05, + "loss": 4.3187, + "step": 56320 + }, + { + "epoch": 0.02, + "learning_rate": 4.906950378121063e-05, + "loss": 4.3104, + "step": 56832 + }, + { + "epoch": 0.02, + "learning_rate": 4.906111783370011e-05, + "loss": 4.329, + "step": 57344 + }, + { + "epoch": 0.02, + "learning_rate": 4.9052748264993326e-05, + "loss": 4.3094, + "step": 57856 + }, + { + "epoch": 0.02, + "learning_rate": 4.90443623174828e-05, + "loss": 4.3148, + "step": 58368 + }, + { + "epoch": 0.02, + "learning_rate": 4.903597636997228e-05, + "loss": 4.3132, + "step": 58880 + }, + { + "epoch": 0.02, + "learning_rate": 4.902759042246176e-05, + "loss": 4.3141, + "step": 59392 + }, + { + "epoch": 0.02, + "learning_rate": 4.9019220853754975e-05, + "loss": 4.3059, + "step": 59904 + }, + { + "epoch": 0.02, + "learning_rate": 4.9010834906244455e-05, + "loss": 4.3012, + "step": 60416 + }, + { + "epoch": 0.02, + "learning_rate": 4.9002448958733935e-05, + "loss": 4.3135, + "step": 60928 + }, + { + "epoch": 0.02, + "learning_rate": 4.8994063011223415e-05, + "loss": 4.3007, + "step": 61440 + }, + { + "epoch": 0.02, + "learning_rate": 4.8985677063712895e-05, + "loss": 4.2884, + "step": 61952 + }, + { + "epoch": 0.02, + "learning_rate": 4.8977291116202375e-05, + "loss": 4.2896, + "step": 62464 + }, + { + "epoch": 0.02, + "learning_rate": 4.8968905168691855e-05, + "loss": 4.2865, + "step": 62976 + }, + { + "epoch": 0.02, + "learning_rate": 4.8960535599985064e-05, + "loss": 4.2855, + "step": 63488 + }, + { + "epoch": 0.02, + "learning_rate": 4.895216603127827e-05, + "loss": 4.2798, + "step": 64000 + }, + { + "epoch": 0.02, + "learning_rate": 4.894378008376775e-05, + "loss": 4.2843, + "step": 64512 + }, + { + "epoch": 0.02, + "learning_rate": 4.893539413625723e-05, + "loss": 4.2702, + "step": 65024 + }, + { + "epoch": 0.02, + "learning_rate": 4.892700818874671e-05, + "loss": 4.2716, + "step": 65536 + }, + { + "epoch": 0.02, + "learning_rate": 4.891862224123619e-05, + "loss": 4.2682, + "step": 66048 + }, + { + "epoch": 0.02, + "learning_rate": 4.891025267252941e-05, + "loss": 4.2869, + "step": 66560 + }, + { + "epoch": 0.02, + "learning_rate": 4.890186672501889e-05, + "loss": 4.2791, + "step": 67072 + }, + { + "epoch": 0.02, + "learning_rate": 4.889348077750837e-05, + "loss": 4.2627, + "step": 67584 + }, + { + "epoch": 0.02, + "learning_rate": 4.888509482999785e-05, + "loss": 4.2631, + "step": 68096 + }, + { + "epoch": 0.02, + "learning_rate": 4.887672526129106e-05, + "loss": 4.2645, + "step": 68608 + }, + { + "epoch": 0.02, + "learning_rate": 4.886833931378054e-05, + "loss": 4.2695, + "step": 69120 + }, + { + "epoch": 0.02, + "learning_rate": 4.885995336627002e-05, + "loss": 4.2666, + "step": 69632 + }, + { + "epoch": 0.02, + "learning_rate": 4.88515674187595e-05, + "loss": 4.2627, + "step": 70144 + }, + { + "epoch": 0.02, + "learning_rate": 4.8843197850052706e-05, + "loss": 4.26, + "step": 70656 + }, + { + "epoch": 0.02, + "learning_rate": 4.8834811902542186e-05, + "loss": 4.258, + "step": 71168 + }, + { + "epoch": 0.02, + "learning_rate": 4.8826425955031666e-05, + "loss": 4.2537, + "step": 71680 + }, + { + "epoch": 0.02, + "learning_rate": 4.8818040007521146e-05, + "loss": 4.2454, + "step": 72192 + }, + { + "epoch": 0.02, + "learning_rate": 4.8809654060010626e-05, + "loss": 4.253, + "step": 72704 + }, + { + "epoch": 0.02, + "learning_rate": 4.8801268112500106e-05, + "loss": 4.2425, + "step": 73216 + }, + { + "epoch": 0.02, + "learning_rate": 4.879289854379332e-05, + "loss": 4.2324, + "step": 73728 + }, + { + "epoch": 0.02, + "learning_rate": 4.87845125962828e-05, + "loss": 4.249, + "step": 74240 + }, + { + "epoch": 0.02, + "learning_rate": 4.877612664877228e-05, + "loss": 4.2426, + "step": 74752 + }, + { + "epoch": 0.02, + "learning_rate": 4.876774070126176e-05, + "loss": 4.2436, + "step": 75264 + }, + { + "epoch": 0.02, + "learning_rate": 4.875935475375124e-05, + "loss": 4.2328, + "step": 75776 + }, + { + "epoch": 0.02, + "learning_rate": 4.875098518504445e-05, + "loss": 4.2343, + "step": 76288 + }, + { + "epoch": 0.03, + "eval_loss": 4.199286460876465, + "eval_runtime": 316.1151, + "eval_samples_per_second": 1207.127, + "eval_steps_per_second": 37.724, + "step": 76320 + }, + { + "epoch": 1.0, + "learning_rate": 4.874259923753393e-05, + "loss": 4.2231, + "step": 76800 + }, + { + "epoch": 1.0, + "learning_rate": 4.873421329002341e-05, + "loss": 4.2193, + "step": 77312 + }, + { + "epoch": 1.0, + "learning_rate": 4.872582734251289e-05, + "loss": 4.2355, + "step": 77824 + }, + { + "epoch": 1.0, + "learning_rate": 4.87174577738061e-05, + "loss": 4.2189, + "step": 78336 + }, + { + "epoch": 1.0, + "learning_rate": 4.870907182629558e-05, + "loss": 4.2278, + "step": 78848 + }, + { + "epoch": 1.0, + "learning_rate": 4.870068587878506e-05, + "loss": 4.2117, + "step": 79360 + }, + { + "epoch": 1.0, + "learning_rate": 4.869229993127454e-05, + "loss": 4.2204, + "step": 79872 + }, + { + "epoch": 1.0, + "learning_rate": 4.8683930362567756e-05, + "loss": 4.2084, + "step": 80384 + }, + { + "epoch": 1.0, + "learning_rate": 4.8675560793860965e-05, + "loss": 4.2139, + "step": 80896 + }, + { + "epoch": 1.0, + "learning_rate": 4.8667174846350445e-05, + "loss": 4.2091, + "step": 81408 + }, + { + "epoch": 1.0, + "learning_rate": 4.8658788898839925e-05, + "loss": 4.2186, + "step": 81920 + }, + { + "epoch": 1.0, + "learning_rate": 4.8650402951329405e-05, + "loss": 4.2117, + "step": 82432 + }, + { + "epoch": 1.0, + "learning_rate": 4.8642017003818885e-05, + "loss": 4.1938, + "step": 82944 + }, + { + "epoch": 1.0, + "learning_rate": 4.8633631056308365e-05, + "loss": 4.196, + "step": 83456 + }, + { + "epoch": 1.0, + "learning_rate": 4.8625245108797845e-05, + "loss": 4.1922, + "step": 83968 + }, + { + "epoch": 1.0, + "learning_rate": 4.8616859161287324e-05, + "loss": 4.1909, + "step": 84480 + }, + { + "epoch": 1.0, + "learning_rate": 4.8608489592580534e-05, + "loss": 4.1934, + "step": 84992 + }, + { + "epoch": 1.0, + "learning_rate": 4.8600103645070014e-05, + "loss": 4.1863, + "step": 85504 + }, + { + "epoch": 1.0, + "learning_rate": 4.8591717697559493e-05, + "loss": 4.1885, + "step": 86016 + }, + { + "epoch": 1.0, + "learning_rate": 4.858333175004898e-05, + "loss": 4.2043, + "step": 86528 + }, + { + "epoch": 1.0, + "learning_rate": 4.857496218134219e-05, + "loss": 4.1877, + "step": 87040 + }, + { + "epoch": 1.0, + "learning_rate": 4.856657623383167e-05, + "loss": 4.1878, + "step": 87552 + }, + { + "epoch": 1.0, + "learning_rate": 4.855819028632115e-05, + "loss": 4.1825, + "step": 88064 + }, + { + "epoch": 1.0, + "learning_rate": 4.854980433881063e-05, + "loss": 4.1948, + "step": 88576 + }, + { + "epoch": 1.0, + "learning_rate": 4.854141839130011e-05, + "loss": 4.1676, + "step": 89088 + }, + { + "epoch": 1.0, + "learning_rate": 4.853304882259332e-05, + "loss": 4.1824, + "step": 89600 + }, + { + "epoch": 1.0, + "learning_rate": 4.85246628750828e-05, + "loss": 4.1718, + "step": 90112 + }, + { + "epoch": 1.0, + "learning_rate": 4.851627692757228e-05, + "loss": 4.1709, + "step": 90624 + }, + { + "epoch": 1.0, + "learning_rate": 4.850789098006175e-05, + "loss": 4.167, + "step": 91136 + }, + { + "epoch": 1.01, + "learning_rate": 4.849952141135497e-05, + "loss": 4.1624, + "step": 91648 + }, + { + "epoch": 1.01, + "learning_rate": 4.849113546384445e-05, + "loss": 4.1689, + "step": 92160 + }, + { + "epoch": 1.01, + "learning_rate": 4.8482749516333934e-05, + "loss": 4.1782, + "step": 92672 + }, + { + "epoch": 1.01, + "learning_rate": 4.847436356882341e-05, + "loss": 4.1638, + "step": 93184 + }, + { + "epoch": 1.01, + "learning_rate": 4.846599400011662e-05, + "loss": 4.1657, + "step": 93696 + }, + { + "epoch": 1.01, + "learning_rate": 4.84576080526061e-05, + "loss": 4.1559, + "step": 94208 + }, + { + "epoch": 1.01, + "learning_rate": 4.844922210509558e-05, + "loss": 4.1633, + "step": 94720 + }, + { + "epoch": 1.01, + "learning_rate": 4.8440836157585056e-05, + "loss": 4.153, + "step": 95232 + }, + { + "epoch": 1.01, + "learning_rate": 4.843246658887827e-05, + "loss": 4.1596, + "step": 95744 + }, + { + "epoch": 1.01, + "learning_rate": 4.842408064136775e-05, + "loss": 4.1407, + "step": 96256 + }, + { + "epoch": 1.01, + "learning_rate": 4.8415694693857225e-05, + "loss": 4.1497, + "step": 96768 + }, + { + "epoch": 1.01, + "learning_rate": 4.8407308746346705e-05, + "loss": 4.1458, + "step": 97280 + }, + { + "epoch": 1.01, + "learning_rate": 4.839893917763992e-05, + "loss": 4.1613, + "step": 97792 + }, + { + "epoch": 1.01, + "learning_rate": 4.83905532301294e-05, + "loss": 4.1345, + "step": 98304 + }, + { + "epoch": 1.01, + "learning_rate": 4.838216728261888e-05, + "loss": 4.1476, + "step": 98816 + }, + { + "epoch": 1.01, + "learning_rate": 4.837378133510836e-05, + "loss": 4.146, + "step": 99328 + }, + { + "epoch": 1.01, + "learning_rate": 4.836539538759784e-05, + "loss": 4.137, + "step": 99840 + }, + { + "epoch": 1.01, + "learning_rate": 4.8357025818891057e-05, + "loss": 4.1499, + "step": 100352 + }, + { + "epoch": 1.01, + "learning_rate": 4.834863987138053e-05, + "loss": 4.1236, + "step": 100864 + }, + { + "epoch": 1.01, + "learning_rate": 4.834025392387001e-05, + "loss": 4.131, + "step": 101376 + }, + { + "epoch": 1.01, + "learning_rate": 4.833186797635949e-05, + "loss": 4.1436, + "step": 101888 + }, + { + "epoch": 1.01, + "learning_rate": 4.83234984076527e-05, + "loss": 4.1362, + "step": 102400 + }, + { + "epoch": 1.01, + "learning_rate": 4.831511246014218e-05, + "loss": 4.1267, + "step": 102912 + }, + { + "epoch": 1.01, + "learning_rate": 4.830672651263166e-05, + "loss": 4.1227, + "step": 103424 + }, + { + "epoch": 1.01, + "learning_rate": 4.829834056512114e-05, + "loss": 4.1287, + "step": 103936 + }, + { + "epoch": 1.01, + "learning_rate": 4.8289970996414354e-05, + "loss": 4.1161, + "step": 104448 + }, + { + "epoch": 1.01, + "learning_rate": 4.8281585048903834e-05, + "loss": 4.135, + "step": 104960 + }, + { + "epoch": 1.01, + "learning_rate": 4.8273199101393314e-05, + "loss": 4.1095, + "step": 105472 + }, + { + "epoch": 1.01, + "learning_rate": 4.8264813153882794e-05, + "loss": 4.1211, + "step": 105984 + }, + { + "epoch": 1.01, + "learning_rate": 4.8256443585176003e-05, + "loss": 4.1223, + "step": 106496 + }, + { + "epoch": 1.01, + "learning_rate": 4.8248057637665483e-05, + "loss": 4.1153, + "step": 107008 + }, + { + "epoch": 1.01, + "learning_rate": 4.823967169015496e-05, + "loss": 4.1238, + "step": 107520 + }, + { + "epoch": 1.01, + "learning_rate": 4.823128574264444e-05, + "loss": 4.1134, + "step": 108032 + }, + { + "epoch": 1.01, + "learning_rate": 4.822291617393765e-05, + "loss": 4.1023, + "step": 108544 + }, + { + "epoch": 1.01, + "learning_rate": 4.821453022642713e-05, + "loss": 4.1015, + "step": 109056 + }, + { + "epoch": 1.01, + "learning_rate": 4.820614427891661e-05, + "loss": 4.1249, + "step": 109568 + }, + { + "epoch": 1.01, + "learning_rate": 4.819775833140609e-05, + "loss": 4.105, + "step": 110080 + }, + { + "epoch": 1.01, + "learning_rate": 4.818938876269931e-05, + "loss": 4.1041, + "step": 110592 + }, + { + "epoch": 1.01, + "learning_rate": 4.818100281518879e-05, + "loss": 4.097, + "step": 111104 + }, + { + "epoch": 1.01, + "learning_rate": 4.817261686767827e-05, + "loss": 4.0984, + "step": 111616 + }, + { + "epoch": 1.01, + "learning_rate": 4.816423092016775e-05, + "loss": 4.1108, + "step": 112128 + }, + { + "epoch": 1.01, + "learning_rate": 4.815586135146096e-05, + "loss": 4.1133, + "step": 112640 + }, + { + "epoch": 1.01, + "learning_rate": 4.814747540395044e-05, + "loss": 4.1015, + "step": 113152 + }, + { + "epoch": 1.01, + "learning_rate": 4.813908945643992e-05, + "loss": 4.1097, + "step": 113664 + }, + { + "epoch": 1.01, + "learning_rate": 4.81307035089294e-05, + "loss": 4.1164, + "step": 114176 + }, + { + "epoch": 1.01, + "learning_rate": 4.8122333940222606e-05, + "loss": 4.1126, + "step": 114688 + }, + { + "epoch": 1.01, + "learning_rate": 4.8113947992712086e-05, + "loss": 4.0902, + "step": 115200 + }, + { + "epoch": 1.01, + "learning_rate": 4.8105562045201566e-05, + "loss": 4.1029, + "step": 115712 + }, + { + "epoch": 1.01, + "learning_rate": 4.8097176097691046e-05, + "loss": 4.0958, + "step": 116224 + }, + { + "epoch": 1.01, + "learning_rate": 4.808880652898426e-05, + "loss": 4.0938, + "step": 116736 + }, + { + "epoch": 1.01, + "learning_rate": 4.808042058147374e-05, + "loss": 4.0921, + "step": 117248 + }, + { + "epoch": 1.01, + "learning_rate": 4.807203463396322e-05, + "loss": 4.0969, + "step": 117760 + }, + { + "epoch": 1.01, + "learning_rate": 4.80636486864527e-05, + "loss": 4.1027, + "step": 118272 + }, + { + "epoch": 1.01, + "learning_rate": 4.805527911774591e-05, + "loss": 4.0959, + "step": 118784 + }, + { + "epoch": 1.01, + "learning_rate": 4.804689317023539e-05, + "loss": 4.0774, + "step": 119296 + }, + { + "epoch": 1.01, + "learning_rate": 4.803850722272487e-05, + "loss": 4.0826, + "step": 119808 + }, + { + "epoch": 1.01, + "learning_rate": 4.803012127521435e-05, + "loss": 4.0859, + "step": 120320 + }, + { + "epoch": 1.01, + "learning_rate": 4.802175170650756e-05, + "loss": 4.0889, + "step": 120832 + }, + { + "epoch": 1.01, + "learning_rate": 4.801336575899704e-05, + "loss": 4.0892, + "step": 121344 + }, + { + "epoch": 1.01, + "learning_rate": 4.800497981148652e-05, + "loss": 4.0772, + "step": 121856 + }, + { + "epoch": 1.02, + "learning_rate": 4.7996593863976e-05, + "loss": 4.0862, + "step": 122368 + }, + { + "epoch": 1.02, + "learning_rate": 4.7988224295269215e-05, + "loss": 4.0673, + "step": 122880 + }, + { + "epoch": 1.02, + "learning_rate": 4.7979838347758695e-05, + "loss": 4.0824, + "step": 123392 + }, + { + "epoch": 1.02, + "learning_rate": 4.7971452400248175e-05, + "loss": 4.0827, + "step": 123904 + }, + { + "epoch": 1.02, + "learning_rate": 4.7963066452737655e-05, + "loss": 4.0796, + "step": 124416 + }, + { + "epoch": 1.02, + "learning_rate": 4.7954696884030864e-05, + "loss": 4.084, + "step": 124928 + }, + { + "epoch": 1.02, + "learning_rate": 4.7946310936520344e-05, + "loss": 4.0709, + "step": 125440 + }, + { + "epoch": 1.02, + "learning_rate": 4.7937924989009824e-05, + "loss": 4.0739, + "step": 125952 + }, + { + "epoch": 1.02, + "learning_rate": 4.7929539041499304e-05, + "loss": 4.0745, + "step": 126464 + }, + { + "epoch": 1.02, + "learning_rate": 4.7921169472792513e-05, + "loss": 4.0767, + "step": 126976 + }, + { + "epoch": 1.02, + "learning_rate": 4.791279990408572e-05, + "loss": 4.0717, + "step": 127488 + }, + { + "epoch": 1.02, + "learning_rate": 4.790441395657521e-05, + "loss": 4.0657, + "step": 128000 + }, + { + "epoch": 1.02, + "learning_rate": 4.789602800906469e-05, + "loss": 4.0656, + "step": 128512 + }, + { + "epoch": 1.02, + "learning_rate": 4.788764206155417e-05, + "loss": 4.0714, + "step": 129024 + }, + { + "epoch": 1.02, + "learning_rate": 4.787925611404365e-05, + "loss": 4.0704, + "step": 129536 + }, + { + "epoch": 1.02, + "learning_rate": 4.787087016653313e-05, + "loss": 4.0502, + "step": 130048 + }, + { + "epoch": 1.02, + "learning_rate": 4.786250059782634e-05, + "loss": 4.0641, + "step": 130560 + }, + { + "epoch": 1.02, + "learning_rate": 4.785411465031582e-05, + "loss": 4.0688, + "step": 131072 + }, + { + "epoch": 1.02, + "learning_rate": 4.78457287028053e-05, + "loss": 4.0604, + "step": 131584 + }, + { + "epoch": 1.02, + "learning_rate": 4.783734275529478e-05, + "loss": 4.0551, + "step": 132096 + }, + { + "epoch": 1.02, + "learning_rate": 4.782895680778426e-05, + "loss": 4.0581, + "step": 132608 + }, + { + "epoch": 1.02, + "learning_rate": 4.782057086027374e-05, + "loss": 4.0491, + "step": 133120 + }, + { + "epoch": 1.02, + "learning_rate": 4.781218491276322e-05, + "loss": 4.0704, + "step": 133632 + }, + { + "epoch": 1.02, + "learning_rate": 4.78037989652527e-05, + "loss": 4.0547, + "step": 134144 + }, + { + "epoch": 1.02, + "learning_rate": 4.779542939654591e-05, + "loss": 4.0585, + "step": 134656 + }, + { + "epoch": 1.02, + "learning_rate": 4.778704344903539e-05, + "loss": 4.0564, + "step": 135168 + }, + { + "epoch": 1.02, + "learning_rate": 4.7778657501524874e-05, + "loss": 4.068, + "step": 135680 + }, + { + "epoch": 1.02, + "learning_rate": 4.777028793281808e-05, + "loss": 4.0537, + "step": 136192 + }, + { + "epoch": 1.02, + "learning_rate": 4.776190198530756e-05, + "loss": 4.0546, + "step": 136704 + }, + { + "epoch": 1.02, + "learning_rate": 4.775351603779704e-05, + "loss": 4.0641, + "step": 137216 + }, + { + "epoch": 1.02, + "learning_rate": 4.774513009028652e-05, + "loss": 4.0538, + "step": 137728 + }, + { + "epoch": 1.02, + "learning_rate": 4.7736744142776e-05, + "loss": 4.0471, + "step": 138240 + }, + { + "epoch": 1.02, + "learning_rate": 4.772835819526548e-05, + "loss": 4.0482, + "step": 138752 + }, + { + "epoch": 1.02, + "learning_rate": 4.7719972247754956e-05, + "loss": 4.0429, + "step": 139264 + }, + { + "epoch": 1.02, + "learning_rate": 4.771160267904817e-05, + "loss": 4.0454, + "step": 139776 + }, + { + "epoch": 1.02, + "learning_rate": 4.770321673153765e-05, + "loss": 4.0459, + "step": 140288 + }, + { + "epoch": 1.02, + "learning_rate": 4.769483078402713e-05, + "loss": 4.0456, + "step": 140800 + }, + { + "epoch": 1.02, + "learning_rate": 4.768644483651661e-05, + "loss": 4.0365, + "step": 141312 + }, + { + "epoch": 1.02, + "learning_rate": 4.767807526780983e-05, + "loss": 4.0393, + "step": 141824 + }, + { + "epoch": 1.02, + "learning_rate": 4.766968932029931e-05, + "loss": 4.0376, + "step": 142336 + }, + { + "epoch": 1.02, + "learning_rate": 4.7661319751592516e-05, + "loss": 4.0554, + "step": 142848 + }, + { + "epoch": 1.02, + "learning_rate": 4.7652933804081996e-05, + "loss": 4.0534, + "step": 143360 + }, + { + "epoch": 1.02, + "learning_rate": 4.7644547856571476e-05, + "loss": 4.0411, + "step": 143872 + }, + { + "epoch": 1.02, + "learning_rate": 4.7636161909060956e-05, + "loss": 4.0311, + "step": 144384 + }, + { + "epoch": 1.02, + "learning_rate": 4.762777596155043e-05, + "loss": 4.0447, + "step": 144896 + }, + { + "epoch": 1.02, + "learning_rate": 4.761939001403991e-05, + "loss": 4.0432, + "step": 145408 + }, + { + "epoch": 1.02, + "learning_rate": 4.761100406652939e-05, + "loss": 4.0447, + "step": 145920 + }, + { + "epoch": 1.02, + "learning_rate": 4.760261811901887e-05, + "loss": 4.0375, + "step": 146432 + }, + { + "epoch": 1.02, + "learning_rate": 4.759424855031208e-05, + "loss": 4.0434, + "step": 146944 + }, + { + "epoch": 1.02, + "learning_rate": 4.7585862602801565e-05, + "loss": 4.0352, + "step": 147456 + }, + { + "epoch": 1.02, + "learning_rate": 4.7577476655291045e-05, + "loss": 4.0379, + "step": 147968 + }, + { + "epoch": 1.02, + "learning_rate": 4.7569090707780525e-05, + "loss": 4.0322, + "step": 148480 + }, + { + "epoch": 1.02, + "learning_rate": 4.7560721139073734e-05, + "loss": 4.0357, + "step": 148992 + }, + { + "epoch": 1.02, + "learning_rate": 4.7552335191563214e-05, + "loss": 4.0295, + "step": 149504 + }, + { + "epoch": 1.02, + "learning_rate": 4.7543949244052694e-05, + "loss": 4.0225, + "step": 150016 + }, + { + "epoch": 1.02, + "learning_rate": 4.7535563296542174e-05, + "loss": 4.0379, + "step": 150528 + }, + { + "epoch": 1.02, + "learning_rate": 4.752719372783538e-05, + "loss": 4.0339, + "step": 151040 + }, + { + "epoch": 1.02, + "learning_rate": 4.751880778032486e-05, + "loss": 4.0352, + "step": 151552 + }, + { + "epoch": 1.02, + "learning_rate": 4.751042183281434e-05, + "loss": 4.025, + "step": 152064 + }, + { + "epoch": 1.02, + "learning_rate": 4.750203588530382e-05, + "loss": 4.0271, + "step": 152576 + }, + { + "epoch": 1.03, + "eval_loss": 4.029434680938721, + "eval_runtime": 316.0863, + "eval_samples_per_second": 1207.237, + "eval_steps_per_second": 37.727, + "step": 152640 + }, + { + "epoch": 0.0, + "learning_rate": 4.74936499377933e-05, + "loss": 4.0176, + "step": 153088 + }, + { + "epoch": 0.0, + "learning_rate": 4.748526399028278e-05, + "loss": 4.015, + "step": 153600 + }, + { + "epoch": 0.0, + "learning_rate": 4.747687804277226e-05, + "loss": 4.031, + "step": 154112 + }, + { + "epoch": 0.0, + "learning_rate": 4.746849209526175e-05, + "loss": 4.0181, + "step": 154624 + }, + { + "epoch": 0.0, + "learning_rate": 4.746010614775123e-05, + "loss": 4.0316, + "step": 155136 + }, + { + "epoch": 0.0, + "learning_rate": 4.745172020024071e-05, + "loss": 4.0128, + "step": 155648 + }, + { + "epoch": 0.0, + "learning_rate": 4.744333425273019e-05, + "loss": 4.0235, + "step": 156160 + }, + { + "epoch": 0.0, + "learning_rate": 4.743494830521967e-05, + "loss": 4.0097, + "step": 156672 + }, + { + "epoch": 0.0, + "learning_rate": 4.742657873651288e-05, + "loss": 4.0215, + "step": 157184 + }, + { + "epoch": 0.0, + "learning_rate": 4.741819278900236e-05, + "loss": 4.0146, + "step": 157696 + }, + { + "epoch": 0.0, + "learning_rate": 4.740980684149184e-05, + "loss": 4.0241, + "step": 158208 + }, + { + "epoch": 0.0, + "learning_rate": 4.740142089398132e-05, + "loss": 4.0181, + "step": 158720 + }, + { + "epoch": 0.0, + "learning_rate": 4.739303494647079e-05, + "loss": 4.0034, + "step": 159232 + }, + { + "epoch": 0.0, + "learning_rate": 4.738464899896027e-05, + "loss": 4.0095, + "step": 159744 + }, + { + "epoch": 0.0, + "learning_rate": 4.737626305144975e-05, + "loss": 4.0027, + "step": 160256 + }, + { + "epoch": 0.0, + "learning_rate": 4.736787710393923e-05, + "loss": 4.0068, + "step": 160768 + }, + { + "epoch": 0.0, + "learning_rate": 4.735950753523245e-05, + "loss": 4.0071, + "step": 161280 + }, + { + "epoch": 0.0, + "learning_rate": 4.735112158772193e-05, + "loss": 4.0009, + "step": 161792 + }, + { + "epoch": 0.0, + "learning_rate": 4.734273564021141e-05, + "loss": 4.0027, + "step": 162304 + }, + { + "epoch": 0.0, + "learning_rate": 4.733434969270089e-05, + "loss": 4.0225, + "step": 162816 + }, + { + "epoch": 0.0, + "learning_rate": 4.7325980123994096e-05, + "loss": 4.0066, + "step": 163328 + }, + { + "epoch": 0.0, + "learning_rate": 4.7317594176483576e-05, + "loss": 4.007, + "step": 163840 + }, + { + "epoch": 0.0, + "learning_rate": 4.7309208228973056e-05, + "loss": 4.0064, + "step": 164352 + }, + { + "epoch": 0.0, + "learning_rate": 4.7300822281462536e-05, + "loss": 4.0106, + "step": 164864 + }, + { + "epoch": 0.0, + "learning_rate": 4.7292452712755745e-05, + "loss": 3.9902, + "step": 165376 + }, + { + "epoch": 0.0, + "learning_rate": 4.7284066765245225e-05, + "loss": 4.0033, + "step": 165888 + }, + { + "epoch": 0.0, + "learning_rate": 4.7275680817734705e-05, + "loss": 4.0005, + "step": 166400 + }, + { + "epoch": 0.0, + "learning_rate": 4.726731124902792e-05, + "loss": 3.9952, + "step": 166912 + }, + { + "epoch": 0.0, + "learning_rate": 4.72589253015174e-05, + "loss": 3.9904, + "step": 167424 + }, + { + "epoch": 0.01, + "learning_rate": 4.725053935400688e-05, + "loss": 3.9887, + "step": 167936 + }, + { + "epoch": 0.01, + "learning_rate": 4.724215340649636e-05, + "loss": 3.9977, + "step": 168448 + }, + { + "epoch": 0.01, + "learning_rate": 4.723376745898584e-05, + "loss": 4.007, + "step": 168960 + }, + { + "epoch": 0.01, + "learning_rate": 4.722539789027905e-05, + "loss": 3.9956, + "step": 169472 + }, + { + "epoch": 0.01, + "learning_rate": 4.721701194276853e-05, + "loss": 3.9972, + "step": 169984 + }, + { + "epoch": 0.01, + "learning_rate": 4.720862599525801e-05, + "loss": 3.9883, + "step": 170496 + }, + { + "epoch": 0.01, + "learning_rate": 4.720024004774749e-05, + "loss": 3.9954, + "step": 171008 + }, + { + "epoch": 0.01, + "learning_rate": 4.719185410023697e-05, + "loss": 3.9848, + "step": 171520 + }, + { + "epoch": 0.01, + "learning_rate": 4.718346815272645e-05, + "loss": 3.9985, + "step": 172032 + }, + { + "epoch": 0.01, + "learning_rate": 4.717508220521593e-05, + "loss": 3.9734, + "step": 172544 + }, + { + "epoch": 0.01, + "learning_rate": 4.716669625770541e-05, + "loss": 3.9869, + "step": 173056 + }, + { + "epoch": 0.01, + "learning_rate": 4.715832668899862e-05, + "loss": 3.9817, + "step": 173568 + }, + { + "epoch": 0.01, + "learning_rate": 4.7149940741488105e-05, + "loss": 3.9989, + "step": 174080 + }, + { + "epoch": 0.01, + "learning_rate": 4.7141554793977585e-05, + "loss": 3.9769, + "step": 174592 + }, + { + "epoch": 0.01, + "learning_rate": 4.7133168846467065e-05, + "loss": 3.9841, + "step": 175104 + }, + { + "epoch": 0.01, + "learning_rate": 4.7124799277760274e-05, + "loss": 3.9878, + "step": 175616 + }, + { + "epoch": 0.01, + "learning_rate": 4.7116413330249754e-05, + "loss": 3.9799, + "step": 176128 + }, + { + "epoch": 0.01, + "learning_rate": 4.7108027382739234e-05, + "loss": 3.9891, + "step": 176640 + }, + { + "epoch": 0.01, + "learning_rate": 4.7099641435228714e-05, + "loss": 3.9689, + "step": 177152 + }, + { + "epoch": 0.01, + "learning_rate": 4.709128824532565e-05, + "loss": 3.9758, + "step": 177664 + }, + { + "epoch": 0.01, + "learning_rate": 4.708290229781513e-05, + "loss": 3.9874, + "step": 178176 + }, + { + "epoch": 0.01, + "learning_rate": 4.707451635030461e-05, + "loss": 3.9774, + "step": 178688 + }, + { + "epoch": 0.01, + "learning_rate": 4.706613040279409e-05, + "loss": 3.9744, + "step": 179200 + }, + { + "epoch": 0.01, + "learning_rate": 4.705774445528357e-05, + "loss": 3.9702, + "step": 179712 + }, + { + "epoch": 0.01, + "learning_rate": 4.704937488657679e-05, + "loss": 3.9779, + "step": 180224 + }, + { + "epoch": 0.01, + "learning_rate": 4.704098893906627e-05, + "loss": 3.9607, + "step": 180736 + }, + { + "epoch": 0.01, + "learning_rate": 4.703260299155575e-05, + "loss": 3.9785, + "step": 181248 + }, + { + "epoch": 0.01, + "learning_rate": 4.702421704404523e-05, + "loss": 3.958, + "step": 181760 + }, + { + "epoch": 0.01, + "learning_rate": 4.701583109653471e-05, + "loss": 3.9731, + "step": 182272 + }, + { + "epoch": 0.01, + "learning_rate": 4.700744514902419e-05, + "loss": 3.9759, + "step": 182784 + }, + { + "epoch": 0.01, + "learning_rate": 4.699905920151367e-05, + "loss": 3.9651, + "step": 183296 + }, + { + "epoch": 0.01, + "learning_rate": 4.699067325400315e-05, + "loss": 3.9715, + "step": 183808 + }, + { + "epoch": 0.01, + "learning_rate": 4.698230368529636e-05, + "loss": 3.9714, + "step": 184320 + }, + { + "epoch": 0.01, + "learning_rate": 4.697391773778584e-05, + "loss": 3.9521, + "step": 184832 + }, + { + "epoch": 0.01, + "learning_rate": 4.696553179027532e-05, + "loss": 3.96, + "step": 185344 + }, + { + "epoch": 0.01, + "learning_rate": 4.6957162221568526e-05, + "loss": 3.9707, + "step": 185856 + }, + { + "epoch": 0.01, + "learning_rate": 4.694877627405801e-05, + "loss": 3.9652, + "step": 186368 + }, + { + "epoch": 0.01, + "learning_rate": 4.694039032654749e-05, + "loss": 3.9618, + "step": 186880 + }, + { + "epoch": 0.01, + "learning_rate": 4.693200437903697e-05, + "loss": 3.9501, + "step": 187392 + }, + { + "epoch": 0.01, + "learning_rate": 4.692363481033018e-05, + "loss": 3.9549, + "step": 187904 + }, + { + "epoch": 0.01, + "learning_rate": 4.691524886281966e-05, + "loss": 3.9704, + "step": 188416 + }, + { + "epoch": 0.01, + "learning_rate": 4.690686291530914e-05, + "loss": 3.9687, + "step": 188928 + }, + { + "epoch": 0.01, + "learning_rate": 4.689847696779862e-05, + "loss": 3.9572, + "step": 189440 + }, + { + "epoch": 0.01, + "learning_rate": 4.6890091020288095e-05, + "loss": 3.9698, + "step": 189952 + }, + { + "epoch": 0.01, + "learning_rate": 4.6881705072777575e-05, + "loss": 3.9745, + "step": 190464 + }, + { + "epoch": 0.01, + "learning_rate": 4.6873319125267055e-05, + "loss": 3.9764, + "step": 190976 + }, + { + "epoch": 0.01, + "learning_rate": 4.686494955656027e-05, + "loss": 3.9466, + "step": 191488 + }, + { + "epoch": 0.01, + "learning_rate": 4.685656360904975e-05, + "loss": 3.9673, + "step": 192000 + }, + { + "epoch": 0.01, + "learning_rate": 4.684817766153923e-05, + "loss": 3.9566, + "step": 192512 + }, + { + "epoch": 0.01, + "learning_rate": 4.683979171402871e-05, + "loss": 3.9537, + "step": 193024 + }, + { + "epoch": 0.01, + "learning_rate": 4.683140576651819e-05, + "loss": 3.9584, + "step": 193536 + }, + { + "epoch": 0.01, + "learning_rate": 4.682301981900767e-05, + "loss": 3.962, + "step": 194048 + }, + { + "epoch": 0.01, + "learning_rate": 4.681463387149715e-05, + "loss": 3.963, + "step": 194560 + }, + { + "epoch": 0.01, + "learning_rate": 4.680624792398663e-05, + "loss": 3.9626, + "step": 195072 + }, + { + "epoch": 0.01, + "learning_rate": 4.679789473408357e-05, + "loss": 3.9399, + "step": 195584 + }, + { + "epoch": 0.01, + "learning_rate": 4.678950878657305e-05, + "loss": 3.9478, + "step": 196096 + }, + { + "epoch": 0.01, + "learning_rate": 4.678112283906253e-05, + "loss": 3.951, + "step": 196608 + }, + { + "epoch": 0.01, + "learning_rate": 4.677273689155201e-05, + "loss": 3.9554, + "step": 197120 + }, + { + "epoch": 0.01, + "learning_rate": 4.676435094404149e-05, + "loss": 3.9577, + "step": 197632 + }, + { + "epoch": 0.01, + "learning_rate": 4.675596499653097e-05, + "loss": 3.9431, + "step": 198144 + }, + { + "epoch": 0.02, + "learning_rate": 4.6747595427824184e-05, + "loss": 3.9559, + "step": 198656 + }, + { + "epoch": 0.02, + "learning_rate": 4.6739209480313664e-05, + "loss": 3.9374, + "step": 199168 + }, + { + "epoch": 0.02, + "learning_rate": 4.6730823532803144e-05, + "loss": 3.9507, + "step": 199680 + }, + { + "epoch": 0.02, + "learning_rate": 4.6722437585292624e-05, + "loss": 3.9487, + "step": 200192 + }, + { + "epoch": 0.02, + "learning_rate": 4.6714051637782104e-05, + "loss": 3.9486, + "step": 200704 + }, + { + "epoch": 0.02, + "learning_rate": 4.670568206907531e-05, + "loss": 3.9536, + "step": 201216 + }, + { + "epoch": 0.02, + "learning_rate": 4.669729612156479e-05, + "loss": 3.9488, + "step": 201728 + }, + { + "epoch": 0.02, + "learning_rate": 4.668891017405427e-05, + "loss": 3.9412, + "step": 202240 + }, + { + "epoch": 0.02, + "learning_rate": 4.668052422654375e-05, + "loss": 3.9503, + "step": 202752 + }, + { + "epoch": 0.02, + "learning_rate": 4.667213827903323e-05, + "loss": 3.9498, + "step": 203264 + }, + { + "epoch": 0.02, + "learning_rate": 4.666376871032644e-05, + "loss": 3.9468, + "step": 203776 + }, + { + "epoch": 0.02, + "learning_rate": 4.665538276281592e-05, + "loss": 3.9364, + "step": 204288 + }, + { + "epoch": 0.02, + "learning_rate": 4.66469968153054e-05, + "loss": 3.9427, + "step": 204800 + }, + { + "epoch": 0.02, + "learning_rate": 4.663861086779488e-05, + "loss": 3.9456, + "step": 205312 + }, + { + "epoch": 0.02, + "learning_rate": 4.663022492028437e-05, + "loss": 3.9466, + "step": 205824 + }, + { + "epoch": 0.02, + "learning_rate": 4.662183897277385e-05, + "loss": 3.9233, + "step": 206336 + }, + { + "epoch": 0.02, + "learning_rate": 4.661346940406706e-05, + "loss": 3.9374, + "step": 206848 + }, + { + "epoch": 0.02, + "learning_rate": 4.660508345655654e-05, + "loss": 3.9473, + "step": 207360 + }, + { + "epoch": 0.02, + "learning_rate": 4.659669750904602e-05, + "loss": 3.937, + "step": 207872 + }, + { + "epoch": 0.02, + "learning_rate": 4.65883115615355e-05, + "loss": 3.9295, + "step": 208384 + }, + { + "epoch": 0.02, + "learning_rate": 4.657992561402498e-05, + "loss": 3.9378, + "step": 208896 + }, + { + "epoch": 0.02, + "learning_rate": 4.657153966651446e-05, + "loss": 3.9303, + "step": 209408 + }, + { + "epoch": 0.02, + "learning_rate": 4.656315371900393e-05, + "loss": 3.9482, + "step": 209920 + }, + { + "epoch": 0.02, + "learning_rate": 4.655476777149341e-05, + "loss": 3.9313, + "step": 210432 + }, + { + "epoch": 0.02, + "learning_rate": 4.6546398202786626e-05, + "loss": 3.9395, + "step": 210944 + }, + { + "epoch": 0.02, + "learning_rate": 4.6538012255276106e-05, + "loss": 3.9391, + "step": 211456 + }, + { + "epoch": 0.02, + "learning_rate": 4.652964268656932e-05, + "loss": 3.9436, + "step": 211968 + }, + { + "epoch": 0.02, + "learning_rate": 4.652127311786253e-05, + "loss": 3.9335, + "step": 212480 + }, + { + "epoch": 0.02, + "learning_rate": 4.651288717035201e-05, + "loss": 3.9389, + "step": 212992 + }, + { + "epoch": 0.02, + "learning_rate": 4.650450122284149e-05, + "loss": 3.9435, + "step": 213504 + }, + { + "epoch": 0.02, + "learning_rate": 4.649611527533097e-05, + "loss": 3.9362, + "step": 214016 + }, + { + "epoch": 0.02, + "learning_rate": 4.648772932782045e-05, + "loss": 3.9266, + "step": 214528 + }, + { + "epoch": 0.02, + "learning_rate": 4.647934338030993e-05, + "loss": 3.9299, + "step": 215040 + }, + { + "epoch": 0.02, + "learning_rate": 4.6470957432799404e-05, + "loss": 3.9269, + "step": 215552 + }, + { + "epoch": 0.02, + "learning_rate": 4.6462571485288884e-05, + "loss": 3.9327, + "step": 216064 + }, + { + "epoch": 0.02, + "learning_rate": 4.64542019165821e-05, + "loss": 3.9276, + "step": 216576 + }, + { + "epoch": 0.02, + "learning_rate": 4.644581596907158e-05, + "loss": 3.9325, + "step": 217088 + }, + { + "epoch": 0.02, + "learning_rate": 4.643743002156106e-05, + "loss": 3.9198, + "step": 217600 + }, + { + "epoch": 0.02, + "learning_rate": 4.642904407405054e-05, + "loss": 3.9224, + "step": 218112 + }, + { + "epoch": 0.02, + "learning_rate": 4.6420674505343756e-05, + "loss": 3.9221, + "step": 218624 + }, + { + "epoch": 0.02, + "learning_rate": 4.6412288557833236e-05, + "loss": 3.9409, + "step": 219136 + }, + { + "epoch": 0.02, + "learning_rate": 4.640390261032271e-05, + "loss": 3.9378, + "step": 219648 + }, + { + "epoch": 0.02, + "learning_rate": 4.639551666281219e-05, + "loss": 3.9317, + "step": 220160 + }, + { + "epoch": 0.02, + "learning_rate": 4.638713071530167e-05, + "loss": 3.9165, + "step": 220672 + }, + { + "epoch": 0.02, + "learning_rate": 4.637876114659488e-05, + "loss": 3.9285, + "step": 221184 + }, + { + "epoch": 0.02, + "learning_rate": 4.637037519908436e-05, + "loss": 3.9326, + "step": 221696 + }, + { + "epoch": 0.02, + "learning_rate": 4.636198925157384e-05, + "loss": 3.9302, + "step": 222208 + }, + { + "epoch": 0.02, + "learning_rate": 4.635360330406332e-05, + "loss": 3.926, + "step": 222720 + }, + { + "epoch": 0.02, + "learning_rate": 4.63452173565528e-05, + "loss": 3.9312, + "step": 223232 + }, + { + "epoch": 0.02, + "learning_rate": 4.633683140904228e-05, + "loss": 3.9247, + "step": 223744 + }, + { + "epoch": 0.02, + "learning_rate": 4.632844546153176e-05, + "loss": 3.933, + "step": 224256 + }, + { + "epoch": 0.02, + "learning_rate": 4.6320059514021244e-05, + "loss": 3.9153, + "step": 224768 + }, + { + "epoch": 0.02, + "learning_rate": 4.6311689945314454e-05, + "loss": 3.9274, + "step": 225280 + }, + { + "epoch": 0.02, + "learning_rate": 4.6303303997803934e-05, + "loss": 3.9191, + "step": 225792 + }, + { + "epoch": 0.02, + "learning_rate": 4.6294918050293413e-05, + "loss": 3.9105, + "step": 226304 + }, + { + "epoch": 0.02, + "learning_rate": 4.628654848158662e-05, + "loss": 3.9267, + "step": 226816 + }, + { + "epoch": 0.02, + "learning_rate": 4.62781625340761e-05, + "loss": 3.9273, + "step": 227328 + }, + { + "epoch": 0.02, + "learning_rate": 4.626977658656558e-05, + "loss": 3.9222, + "step": 227840 + }, + { + "epoch": 0.02, + "learning_rate": 4.626139063905506e-05, + "loss": 3.9188, + "step": 228352 + }, + { + "epoch": 0.02, + "learning_rate": 4.625300469154454e-05, + "loss": 3.9144, + "step": 228864 + }, + { + "epoch": 0.03, + "eval_loss": 3.9534971714019775, + "eval_runtime": 305.1203, + "eval_samples_per_second": 1250.625, + "eval_steps_per_second": 39.083, + "step": 228960 + }, + { + "epoch": 1.0, + "learning_rate": 4.624461874403402e-05, + "loss": 3.9207, + "step": 229376 + }, + { + "epoch": 1.0, + "learning_rate": 4.623624917532723e-05, + "loss": 3.9078, + "step": 229888 + }, + { + "epoch": 1.0, + "learning_rate": 4.622786322781671e-05, + "loss": 3.9234, + "step": 230400 + }, + { + "epoch": 1.0, + "learning_rate": 4.62194772803062e-05, + "loss": 3.9097, + "step": 230912 + }, + { + "epoch": 1.0, + "learning_rate": 4.621109133279568e-05, + "loss": 3.9302, + "step": 231424 + }, + { + "epoch": 1.0, + "learning_rate": 4.620270538528516e-05, + "loss": 3.9022, + "step": 231936 + }, + { + "epoch": 1.0, + "learning_rate": 4.619433581657837e-05, + "loss": 3.9201, + "step": 232448 + }, + { + "epoch": 1.0, + "learning_rate": 4.618594986906785e-05, + "loss": 3.9038, + "step": 232960 + }, + { + "epoch": 1.0, + "learning_rate": 4.617756392155733e-05, + "loss": 3.9196, + "step": 233472 + }, + { + "epoch": 1.0, + "learning_rate": 4.616917797404681e-05, + "loss": 3.9125, + "step": 233984 + }, + { + "epoch": 1.0, + "learning_rate": 4.616079202653629e-05, + "loss": 3.9118, + "step": 234496 + }, + { + "epoch": 1.0, + "learning_rate": 4.615240607902577e-05, + "loss": 3.9182, + "step": 235008 + }, + { + "epoch": 1.0, + "learning_rate": 4.6144036510318976e-05, + "loss": 3.8984, + "step": 235520 + }, + { + "epoch": 1.0, + "learning_rate": 4.6135650562808456e-05, + "loss": 3.9062, + "step": 236032 + }, + { + "epoch": 1.0, + "learning_rate": 4.6127264615297936e-05, + "loss": 3.9027, + "step": 236544 + }, + { + "epoch": 1.0, + "learning_rate": 4.6118878667787416e-05, + "loss": 3.9037, + "step": 237056 + }, + { + "epoch": 1.0, + "learning_rate": 4.6110492720276896e-05, + "loss": 3.9062, + "step": 237568 + }, + { + "epoch": 1.0, + "learning_rate": 4.6102106772766376e-05, + "loss": 3.8998, + "step": 238080 + }, + { + "epoch": 1.0, + "learning_rate": 4.609373720405959e-05, + "loss": 3.9014, + "step": 238592 + }, + { + "epoch": 1.0, + "learning_rate": 4.6085351256549065e-05, + "loss": 3.9224, + "step": 239104 + }, + { + "epoch": 1.0, + "learning_rate": 4.6076965309038545e-05, + "loss": 3.9063, + "step": 239616 + }, + { + "epoch": 1.0, + "learning_rate": 4.6068579361528025e-05, + "loss": 3.9065, + "step": 240128 + }, + { + "epoch": 1.0, + "learning_rate": 4.6060193414017505e-05, + "loss": 3.9085, + "step": 240640 + }, + { + "epoch": 1.0, + "learning_rate": 4.6051807466506985e-05, + "loss": 3.9093, + "step": 241152 + }, + { + "epoch": 1.0, + "learning_rate": 4.6043437897800194e-05, + "loss": 3.8913, + "step": 241664 + }, + { + "epoch": 1.0, + "learning_rate": 4.6035051950289674e-05, + "loss": 3.91, + "step": 242176 + }, + { + "epoch": 1.0, + "learning_rate": 4.6026666002779154e-05, + "loss": 3.8973, + "step": 242688 + }, + { + "epoch": 1.0, + "learning_rate": 4.6018280055268634e-05, + "loss": 3.8968, + "step": 243200 + }, + { + "epoch": 1.0, + "learning_rate": 4.600991048656185e-05, + "loss": 3.8896, + "step": 243712 + }, + { + "epoch": 1.01, + "learning_rate": 4.600152453905133e-05, + "loss": 3.8958, + "step": 244224 + }, + { + "epoch": 1.01, + "learning_rate": 4.599313859154081e-05, + "loss": 3.8969, + "step": 244736 + }, + { + "epoch": 1.01, + "learning_rate": 4.598475264403029e-05, + "loss": 3.9114, + "step": 245248 + }, + { + "epoch": 1.01, + "learning_rate": 4.59763830753235e-05, + "loss": 3.9021, + "step": 245760 + }, + { + "epoch": 1.01, + "learning_rate": 4.596799712781298e-05, + "loss": 3.8988, + "step": 246272 + }, + { + "epoch": 1.01, + "learning_rate": 4.595961118030246e-05, + "loss": 3.89, + "step": 246784 + }, + { + "epoch": 1.01, + "learning_rate": 4.595122523279194e-05, + "loss": 3.9012, + "step": 247296 + }, + { + "epoch": 1.01, + "learning_rate": 4.594283928528142e-05, + "loss": 3.8855, + "step": 247808 + }, + { + "epoch": 1.01, + "learning_rate": 4.593446971657463e-05, + "loss": 3.9055, + "step": 248320 + }, + { + "epoch": 1.01, + "learning_rate": 4.592608376906411e-05, + "loss": 3.8795, + "step": 248832 + }, + { + "epoch": 1.01, + "learning_rate": 4.591769782155359e-05, + "loss": 3.8902, + "step": 249344 + }, + { + "epoch": 1.01, + "learning_rate": 4.590931187404307e-05, + "loss": 3.8883, + "step": 249856 + }, + { + "epoch": 1.01, + "learning_rate": 4.590094230533628e-05, + "loss": 3.9042, + "step": 250368 + }, + { + "epoch": 1.01, + "learning_rate": 4.589255635782576e-05, + "loss": 3.8878, + "step": 250880 + }, + { + "epoch": 1.01, + "learning_rate": 4.588417041031524e-05, + "loss": 3.8895, + "step": 251392 + }, + { + "epoch": 1.01, + "learning_rate": 4.587578446280472e-05, + "loss": 3.8931, + "step": 251904 + }, + { + "epoch": 1.01, + "learning_rate": 4.58673985152942e-05, + "loss": 3.8904, + "step": 252416 + }, + { + "epoch": 1.01, + "learning_rate": 4.585902894658741e-05, + "loss": 3.8951, + "step": 252928 + }, + { + "epoch": 1.01, + "learning_rate": 4.585064299907689e-05, + "loss": 3.8799, + "step": 253440 + }, + { + "epoch": 1.01, + "learning_rate": 4.584225705156637e-05, + "loss": 3.8789, + "step": 253952 + }, + { + "epoch": 1.01, + "learning_rate": 4.583387110405585e-05, + "loss": 3.8949, + "step": 254464 + }, + { + "epoch": 1.01, + "learning_rate": 4.582550153534906e-05, + "loss": 3.8862, + "step": 254976 + }, + { + "epoch": 1.01, + "learning_rate": 4.581711558783854e-05, + "loss": 3.8842, + "step": 255488 + }, + { + "epoch": 1.01, + "learning_rate": 4.580872964032802e-05, + "loss": 3.881, + "step": 256000 + }, + { + "epoch": 1.01, + "learning_rate": 4.580034369281751e-05, + "loss": 3.8854, + "step": 256512 + }, + { + "epoch": 1.01, + "learning_rate": 4.579195774530699e-05, + "loss": 3.8698, + "step": 257024 + }, + { + "epoch": 1.01, + "learning_rate": 4.578357179779647e-05, + "loss": 3.8902, + "step": 257536 + }, + { + "epoch": 1.01, + "learning_rate": 4.577520222908968e-05, + "loss": 3.8687, + "step": 258048 + }, + { + "epoch": 1.01, + "learning_rate": 4.576681628157916e-05, + "loss": 3.8841, + "step": 258560 + }, + { + "epoch": 1.01, + "learning_rate": 4.5758430334068637e-05, + "loss": 3.8813, + "step": 259072 + }, + { + "epoch": 1.01, + "learning_rate": 4.5750044386558117e-05, + "loss": 3.8825, + "step": 259584 + }, + { + "epoch": 1.01, + "learning_rate": 4.5741674817851326e-05, + "loss": 3.8769, + "step": 260096 + }, + { + "epoch": 1.01, + "learning_rate": 4.5733288870340806e-05, + "loss": 3.8847, + "step": 260608 + }, + { + "epoch": 1.01, + "learning_rate": 4.5724902922830286e-05, + "loss": 3.8617, + "step": 261120 + }, + { + "epoch": 1.01, + "learning_rate": 4.5716516975319765e-05, + "loss": 3.876, + "step": 261632 + }, + { + "epoch": 1.01, + "learning_rate": 4.5708131027809245e-05, + "loss": 3.8819, + "step": 262144 + }, + { + "epoch": 1.01, + "learning_rate": 4.569976145910246e-05, + "loss": 3.8795, + "step": 262656 + }, + { + "epoch": 1.01, + "learning_rate": 4.569137551159194e-05, + "loss": 3.8702, + "step": 263168 + }, + { + "epoch": 1.01, + "learning_rate": 4.568298956408142e-05, + "loss": 3.8689, + "step": 263680 + }, + { + "epoch": 1.01, + "learning_rate": 4.56746036165709e-05, + "loss": 3.8678, + "step": 264192 + }, + { + "epoch": 1.01, + "learning_rate": 4.5666217669060374e-05, + "loss": 3.8768, + "step": 264704 + }, + { + "epoch": 1.01, + "learning_rate": 4.565784810035359e-05, + "loss": 3.8822, + "step": 265216 + }, + { + "epoch": 1.01, + "learning_rate": 4.564946215284307e-05, + "loss": 3.8715, + "step": 265728 + }, + { + "epoch": 1.01, + "learning_rate": 4.564107620533255e-05, + "loss": 3.8843, + "step": 266240 + }, + { + "epoch": 1.01, + "learning_rate": 4.563269025782202e-05, + "loss": 3.8875, + "step": 266752 + }, + { + "epoch": 1.01, + "learning_rate": 4.56243043103115e-05, + "loss": 3.8933, + "step": 267264 + }, + { + "epoch": 1.01, + "learning_rate": 4.561593474160472e-05, + "loss": 3.8601, + "step": 267776 + }, + { + "epoch": 1.01, + "learning_rate": 4.56075487940942e-05, + "loss": 3.8778, + "step": 268288 + }, + { + "epoch": 1.01, + "learning_rate": 4.559916284658368e-05, + "loss": 3.8779, + "step": 268800 + }, + { + "epoch": 1.01, + "learning_rate": 4.559077689907316e-05, + "loss": 3.8638, + "step": 269312 + }, + { + "epoch": 1.01, + "learning_rate": 4.5582407330366375e-05, + "loss": 3.8754, + "step": 269824 + }, + { + "epoch": 1.01, + "learning_rate": 4.557402138285585e-05, + "loss": 3.8793, + "step": 270336 + }, + { + "epoch": 1.01, + "learning_rate": 4.556563543534533e-05, + "loss": 3.8807, + "step": 270848 + }, + { + "epoch": 1.01, + "learning_rate": 4.555724948783481e-05, + "loss": 3.8715, + "step": 271360 + }, + { + "epoch": 1.01, + "learning_rate": 4.5548879919128024e-05, + "loss": 3.8621, + "step": 271872 + }, + { + "epoch": 1.01, + "learning_rate": 4.55404939716175e-05, + "loss": 3.8616, + "step": 272384 + }, + { + "epoch": 1.01, + "learning_rate": 4.553210802410698e-05, + "loss": 3.8661, + "step": 272896 + }, + { + "epoch": 1.01, + "learning_rate": 4.552372207659646e-05, + "loss": 3.8726, + "step": 273408 + }, + { + "epoch": 1.01, + "learning_rate": 4.5515352507889666e-05, + "loss": 3.8764, + "step": 273920 + }, + { + "epoch": 1.01, + "learning_rate": 4.550696656037915e-05, + "loss": 3.8609, + "step": 274432 + }, + { + "epoch": 1.02, + "learning_rate": 4.549858061286863e-05, + "loss": 3.8708, + "step": 274944 + }, + { + "epoch": 1.02, + "learning_rate": 4.549019466535811e-05, + "loss": 3.8563, + "step": 275456 + }, + { + "epoch": 1.02, + "learning_rate": 4.548180871784759e-05, + "loss": 3.8662, + "step": 275968 + }, + { + "epoch": 1.02, + "learning_rate": 4.54734391491408e-05, + "loss": 3.8676, + "step": 276480 + }, + { + "epoch": 1.02, + "learning_rate": 4.546505320163028e-05, + "loss": 3.8662, + "step": 276992 + }, + { + "epoch": 1.02, + "learning_rate": 4.545666725411976e-05, + "loss": 3.8747, + "step": 277504 + }, + { + "epoch": 1.02, + "learning_rate": 4.544828130660924e-05, + "loss": 3.867, + "step": 278016 + }, + { + "epoch": 1.02, + "learning_rate": 4.543989535909872e-05, + "loss": 3.8612, + "step": 278528 + }, + { + "epoch": 1.02, + "learning_rate": 4.543152579039193e-05, + "loss": 3.8691, + "step": 279040 + }, + { + "epoch": 1.02, + "learning_rate": 4.542313984288141e-05, + "loss": 3.8695, + "step": 279552 + }, + { + "epoch": 1.02, + "learning_rate": 4.541477027417462e-05, + "loss": 3.8685, + "step": 280064 + }, + { + "epoch": 1.02, + "learning_rate": 4.5406384326664106e-05, + "loss": 3.8549, + "step": 280576 + }, + { + "epoch": 1.02, + "learning_rate": 4.5397998379153586e-05, + "loss": 3.8613, + "step": 281088 + }, + { + "epoch": 1.02, + "learning_rate": 4.5389612431643066e-05, + "loss": 3.8641, + "step": 281600 + }, + { + "epoch": 1.02, + "learning_rate": 4.5381226484132546e-05, + "loss": 3.8695, + "step": 282112 + }, + { + "epoch": 1.02, + "learning_rate": 4.5372840536622026e-05, + "loss": 3.8486, + "step": 282624 + }, + { + "epoch": 1.02, + "learning_rate": 4.5364470967915235e-05, + "loss": 3.8532, + "step": 283136 + }, + { + "epoch": 1.02, + "learning_rate": 4.5356085020404715e-05, + "loss": 3.8681, + "step": 283648 + }, + { + "epoch": 1.02, + "learning_rate": 4.5347699072894195e-05, + "loss": 3.8587, + "step": 284160 + }, + { + "epoch": 1.02, + "learning_rate": 4.5339313125383675e-05, + "loss": 3.851, + "step": 284672 + }, + { + "epoch": 1.02, + "learning_rate": 4.5330927177873155e-05, + "loss": 3.8549, + "step": 285184 + }, + { + "epoch": 1.02, + "learning_rate": 4.5322541230362635e-05, + "loss": 3.8569, + "step": 285696 + }, + { + "epoch": 1.02, + "learning_rate": 4.5314155282852115e-05, + "loss": 3.8661, + "step": 286208 + }, + { + "epoch": 1.02, + "learning_rate": 4.5305769335341595e-05, + "loss": 3.8562, + "step": 286720 + }, + { + "epoch": 1.02, + "learning_rate": 4.5297399766634804e-05, + "loss": 3.855, + "step": 287232 + }, + { + "epoch": 1.02, + "learning_rate": 4.5289013819124284e-05, + "loss": 3.8622, + "step": 287744 + }, + { + "epoch": 1.02, + "learning_rate": 4.528062787161377e-05, + "loss": 3.8657, + "step": 288256 + }, + { + "epoch": 1.02, + "learning_rate": 4.527224192410325e-05, + "loss": 3.8643, + "step": 288768 + }, + { + "epoch": 1.02, + "learning_rate": 4.526387235539646e-05, + "loss": 3.8546, + "step": 289280 + }, + { + "epoch": 1.02, + "learning_rate": 4.525548640788594e-05, + "loss": 3.8695, + "step": 289792 + }, + { + "epoch": 1.02, + "learning_rate": 4.524710046037542e-05, + "loss": 3.8575, + "step": 290304 + }, + { + "epoch": 1.02, + "learning_rate": 4.52387145128649e-05, + "loss": 3.8459, + "step": 290816 + }, + { + "epoch": 1.02, + "learning_rate": 4.523034494415811e-05, + "loss": 3.855, + "step": 291328 + }, + { + "epoch": 1.02, + "learning_rate": 4.522195899664759e-05, + "loss": 3.8501, + "step": 291840 + }, + { + "epoch": 1.02, + "learning_rate": 4.521357304913707e-05, + "loss": 3.8573, + "step": 292352 + }, + { + "epoch": 1.02, + "learning_rate": 4.520518710162655e-05, + "loss": 3.8498, + "step": 292864 + }, + { + "epoch": 1.02, + "learning_rate": 4.519681753291976e-05, + "loss": 3.8584, + "step": 293376 + }, + { + "epoch": 1.02, + "learning_rate": 4.518843158540924e-05, + "loss": 3.8453, + "step": 293888 + }, + { + "epoch": 1.02, + "learning_rate": 4.5180045637898725e-05, + "loss": 3.8465, + "step": 294400 + }, + { + "epoch": 1.02, + "learning_rate": 4.5171659690388204e-05, + "loss": 3.8477, + "step": 294912 + }, + { + "epoch": 1.02, + "learning_rate": 4.5163290121681414e-05, + "loss": 3.866, + "step": 295424 + }, + { + "epoch": 1.02, + "learning_rate": 4.5154904174170894e-05, + "loss": 3.8583, + "step": 295936 + }, + { + "epoch": 1.02, + "learning_rate": 4.5146518226660373e-05, + "loss": 3.8584, + "step": 296448 + }, + { + "epoch": 1.02, + "learning_rate": 4.5138132279149853e-05, + "loss": 3.842, + "step": 296960 + }, + { + "epoch": 1.02, + "learning_rate": 4.5129746331639333e-05, + "loss": 3.8565, + "step": 297472 + }, + { + "epoch": 1.02, + "learning_rate": 4.512137676293254e-05, + "loss": 3.8612, + "step": 297984 + }, + { + "epoch": 1.02, + "learning_rate": 4.511299081542202e-05, + "loss": 3.8515, + "step": 298496 + }, + { + "epoch": 1.02, + "learning_rate": 4.51046048679115e-05, + "loss": 3.8498, + "step": 299008 + }, + { + "epoch": 1.02, + "learning_rate": 4.5096218920400976e-05, + "loss": 3.8567, + "step": 299520 + }, + { + "epoch": 1.02, + "learning_rate": 4.508784935169419e-05, + "loss": 3.8508, + "step": 300032 + }, + { + "epoch": 1.02, + "learning_rate": 4.507946340418368e-05, + "loss": 3.8601, + "step": 300544 + }, + { + "epoch": 1.02, + "learning_rate": 4.507107745667316e-05, + "loss": 3.8402, + "step": 301056 + }, + { + "epoch": 1.02, + "learning_rate": 4.506269150916263e-05, + "loss": 3.8547, + "step": 301568 + }, + { + "epoch": 1.02, + "learning_rate": 4.505432194045585e-05, + "loss": 3.8437, + "step": 302080 + }, + { + "epoch": 1.02, + "learning_rate": 4.504593599294533e-05, + "loss": 3.8411, + "step": 302592 + }, + { + "epoch": 1.02, + "learning_rate": 4.50375500454348e-05, + "loss": 3.8514, + "step": 303104 + }, + { + "epoch": 1.02, + "learning_rate": 4.502916409792428e-05, + "loss": 3.8572, + "step": 303616 + }, + { + "epoch": 1.02, + "learning_rate": 4.502077815041376e-05, + "loss": 3.8501, + "step": 304128 + }, + { + "epoch": 1.02, + "learning_rate": 4.5012408581706976e-05, + "loss": 3.843, + "step": 304640 + }, + { + "epoch": 1.02, + "learning_rate": 4.500402263419645e-05, + "loss": 3.8471, + "step": 305152 + }, + { + "epoch": 1.03, + "eval_loss": 3.911478042602539, + "eval_runtime": 304.6439, + "eval_samples_per_second": 1252.581, + "eval_steps_per_second": 39.144, + "step": 305280 + }, + { + "epoch": 0.0, + "learning_rate": 4.499563668668593e-05, + "loss": 3.8416, + "step": 305664 + }, + { + "epoch": 0.0, + "learning_rate": 4.4987250739175416e-05, + "loss": 3.8381, + "step": 306176 + }, + { + "epoch": 0.0, + "learning_rate": 4.4978864791664896e-05, + "loss": 3.8472, + "step": 306688 + }, + { + "epoch": 0.0, + "learning_rate": 4.4970478844154376e-05, + "loss": 3.8409, + "step": 307200 + }, + { + "epoch": 0.0, + "learning_rate": 4.4962092896643856e-05, + "loss": 3.8565, + "step": 307712 + }, + { + "epoch": 0.0, + "learning_rate": 4.4953706949133336e-05, + "loss": 3.8374, + "step": 308224 + }, + { + "epoch": 0.0, + "learning_rate": 4.4945321001622816e-05, + "loss": 3.8442, + "step": 308736 + }, + { + "epoch": 0.0, + "learning_rate": 4.4936935054112296e-05, + "loss": 3.8327, + "step": 309248 + }, + { + "epoch": 0.0, + "learning_rate": 4.4928565485405505e-05, + "loss": 3.8469, + "step": 309760 + }, + { + "epoch": 0.0, + "learning_rate": 4.4920179537894985e-05, + "loss": 3.8464, + "step": 310272 + }, + { + "epoch": 0.0, + "learning_rate": 4.4911793590384465e-05, + "loss": 3.8404, + "step": 310784 + }, + { + "epoch": 0.0, + "learning_rate": 4.4903407642873945e-05, + "loss": 3.8459, + "step": 311296 + }, + { + "epoch": 0.0, + "learning_rate": 4.4895038074167154e-05, + "loss": 3.8296, + "step": 311808 + }, + { + "epoch": 0.0, + "learning_rate": 4.4886652126656634e-05, + "loss": 3.8352, + "step": 312320 + }, + { + "epoch": 0.0, + "learning_rate": 4.4878266179146114e-05, + "loss": 3.8334, + "step": 312832 + }, + { + "epoch": 0.0, + "learning_rate": 4.48698802316356e-05, + "loss": 3.8329, + "step": 313344 + }, + { + "epoch": 0.0, + "learning_rate": 4.486151066292881e-05, + "loss": 3.8376, + "step": 313856 + }, + { + "epoch": 0.0, + "learning_rate": 4.485312471541829e-05, + "loss": 3.8317, + "step": 314368 + }, + { + "epoch": 0.0, + "learning_rate": 4.484473876790777e-05, + "loss": 3.8343, + "step": 314880 + }, + { + "epoch": 0.0, + "learning_rate": 4.483635282039725e-05, + "loss": 3.8485, + "step": 315392 + }, + { + "epoch": 0.0, + "learning_rate": 4.482798325169046e-05, + "loss": 3.8407, + "step": 315904 + }, + { + "epoch": 0.0, + "learning_rate": 4.481959730417994e-05, + "loss": 3.8379, + "step": 316416 + }, + { + "epoch": 0.0, + "learning_rate": 4.481121135666942e-05, + "loss": 3.8397, + "step": 316928 + }, + { + "epoch": 0.0, + "learning_rate": 4.48028254091589e-05, + "loss": 3.8404, + "step": 317440 + }, + { + "epoch": 0.0, + "learning_rate": 4.479445584045211e-05, + "loss": 3.8234, + "step": 317952 + }, + { + "epoch": 0.0, + "learning_rate": 4.478606989294159e-05, + "loss": 3.8417, + "step": 318464 + }, + { + "epoch": 0.0, + "learning_rate": 4.477768394543107e-05, + "loss": 3.8308, + "step": 318976 + }, + { + "epoch": 0.0, + "learning_rate": 4.476929799792055e-05, + "loss": 3.8273, + "step": 319488 + }, + { + "epoch": 0.0, + "learning_rate": 4.476092842921376e-05, + "loss": 3.8226, + "step": 320000 + }, + { + "epoch": 0.0, + "learning_rate": 4.475254248170324e-05, + "loss": 3.8302, + "step": 320512 + }, + { + "epoch": 0.01, + "learning_rate": 4.474415653419272e-05, + "loss": 3.8289, + "step": 321024 + }, + { + "epoch": 0.01, + "learning_rate": 4.47357705866822e-05, + "loss": 3.8443, + "step": 321536 + }, + { + "epoch": 0.01, + "learning_rate": 4.472738463917168e-05, + "loss": 3.8343, + "step": 322048 + }, + { + "epoch": 0.01, + "learning_rate": 4.471901507046489e-05, + "loss": 3.831, + "step": 322560 + }, + { + "epoch": 0.01, + "learning_rate": 4.471062912295437e-05, + "loss": 3.8226, + "step": 323072 + }, + { + "epoch": 0.01, + "learning_rate": 4.470224317544385e-05, + "loss": 3.8348, + "step": 323584 + }, + { + "epoch": 0.01, + "learning_rate": 4.469385722793333e-05, + "loss": 3.8268, + "step": 324096 + }, + { + "epoch": 0.01, + "learning_rate": 4.468548765922654e-05, + "loss": 3.8349, + "step": 324608 + }, + { + "epoch": 0.01, + "learning_rate": 4.467710171171602e-05, + "loss": 3.8128, + "step": 325120 + }, + { + "epoch": 0.01, + "learning_rate": 4.46687157642055e-05, + "loss": 3.8234, + "step": 325632 + }, + { + "epoch": 0.01, + "learning_rate": 4.466034619549872e-05, + "loss": 3.8243, + "step": 326144 + }, + { + "epoch": 0.01, + "learning_rate": 4.46519602479882e-05, + "loss": 3.8388, + "step": 326656 + }, + { + "epoch": 0.01, + "learning_rate": 4.464357430047768e-05, + "loss": 3.8255, + "step": 327168 + }, + { + "epoch": 0.01, + "learning_rate": 4.463518835296716e-05, + "loss": 3.8256, + "step": 327680 + }, + { + "epoch": 0.01, + "learning_rate": 4.462680240545664e-05, + "loss": 3.8268, + "step": 328192 + }, + { + "epoch": 0.01, + "learning_rate": 4.4618432836749846e-05, + "loss": 3.823, + "step": 328704 + }, + { + "epoch": 0.01, + "learning_rate": 4.4610046889239326e-05, + "loss": 3.8307, + "step": 329216 + }, + { + "epoch": 0.01, + "learning_rate": 4.4601660941728806e-05, + "loss": 3.8159, + "step": 329728 + }, + { + "epoch": 0.01, + "learning_rate": 4.4593274994218286e-05, + "loss": 3.8116, + "step": 330240 + }, + { + "epoch": 0.01, + "learning_rate": 4.4584905425511495e-05, + "loss": 3.8304, + "step": 330752 + }, + { + "epoch": 0.01, + "learning_rate": 4.4576519478000975e-05, + "loss": 3.8206, + "step": 331264 + }, + { + "epoch": 0.01, + "learning_rate": 4.4568133530490455e-05, + "loss": 3.8207, + "step": 331776 + }, + { + "epoch": 0.01, + "learning_rate": 4.455974758297994e-05, + "loss": 3.8153, + "step": 332288 + }, + { + "epoch": 0.01, + "learning_rate": 4.455137801427315e-05, + "loss": 3.8224, + "step": 332800 + }, + { + "epoch": 0.01, + "learning_rate": 4.454299206676263e-05, + "loss": 3.8061, + "step": 333312 + }, + { + "epoch": 0.01, + "learning_rate": 4.453460611925211e-05, + "loss": 3.8218, + "step": 333824 + }, + { + "epoch": 0.01, + "learning_rate": 4.4526220171741584e-05, + "loss": 3.8097, + "step": 334336 + }, + { + "epoch": 0.01, + "learning_rate": 4.45178506030348e-05, + "loss": 3.8182, + "step": 334848 + }, + { + "epoch": 0.01, + "learning_rate": 4.450946465552428e-05, + "loss": 3.8218, + "step": 335360 + }, + { + "epoch": 0.01, + "learning_rate": 4.450107870801376e-05, + "loss": 3.818, + "step": 335872 + }, + { + "epoch": 0.01, + "learning_rate": 4.449269276050323e-05, + "loss": 3.815, + "step": 336384 + }, + { + "epoch": 0.01, + "learning_rate": 4.448432319179645e-05, + "loss": 3.8271, + "step": 336896 + }, + { + "epoch": 0.01, + "learning_rate": 4.447593724428593e-05, + "loss": 3.801, + "step": 337408 + }, + { + "epoch": 0.01, + "learning_rate": 4.446755129677541e-05, + "loss": 3.807, + "step": 337920 + }, + { + "epoch": 0.01, + "learning_rate": 4.445916534926489e-05, + "loss": 3.8198, + "step": 338432 + }, + { + "epoch": 0.01, + "learning_rate": 4.4450795780558104e-05, + "loss": 3.8148, + "step": 338944 + }, + { + "epoch": 0.01, + "learning_rate": 4.4442409833047584e-05, + "loss": 3.8111, + "step": 339456 + }, + { + "epoch": 0.01, + "learning_rate": 4.443402388553706e-05, + "loss": 3.8048, + "step": 339968 + }, + { + "epoch": 0.01, + "learning_rate": 4.442563793802654e-05, + "loss": 3.8051, + "step": 340480 + }, + { + "epoch": 0.01, + "learning_rate": 4.441726836931975e-05, + "loss": 3.8127, + "step": 340992 + }, + { + "epoch": 0.01, + "learning_rate": 4.440888242180923e-05, + "loss": 3.8203, + "step": 341504 + }, + { + "epoch": 0.01, + "learning_rate": 4.4400496474298706e-05, + "loss": 3.8126, + "step": 342016 + }, + { + "epoch": 0.01, + "learning_rate": 4.4392110526788186e-05, + "loss": 3.8209, + "step": 342528 + }, + { + "epoch": 0.01, + "learning_rate": 4.4383724579277666e-05, + "loss": 3.8232, + "step": 343040 + }, + { + "epoch": 0.01, + "learning_rate": 4.437535501057088e-05, + "loss": 3.8343, + "step": 343552 + }, + { + "epoch": 0.01, + "learning_rate": 4.436696906306036e-05, + "loss": 3.8034, + "step": 344064 + }, + { + "epoch": 0.01, + "learning_rate": 4.435858311554984e-05, + "loss": 3.8137, + "step": 344576 + }, + { + "epoch": 0.01, + "learning_rate": 4.435019716803932e-05, + "loss": 3.8208, + "step": 345088 + }, + { + "epoch": 0.01, + "learning_rate": 4.434182759933253e-05, + "loss": 3.7979, + "step": 345600 + }, + { + "epoch": 0.01, + "learning_rate": 4.433344165182201e-05, + "loss": 3.8189, + "step": 346112 + }, + { + "epoch": 0.01, + "learning_rate": 4.432505570431149e-05, + "loss": 3.8162, + "step": 346624 + }, + { + "epoch": 0.01, + "learning_rate": 4.431666975680097e-05, + "loss": 3.8196, + "step": 347136 + }, + { + "epoch": 0.01, + "learning_rate": 4.430830018809418e-05, + "loss": 3.8134, + "step": 347648 + }, + { + "epoch": 0.01, + "learning_rate": 4.429991424058366e-05, + "loss": 3.8015, + "step": 348160 + }, + { + "epoch": 0.01, + "learning_rate": 4.429152829307314e-05, + "loss": 3.8018, + "step": 348672 + }, + { + "epoch": 0.01, + "learning_rate": 4.428314234556262e-05, + "loss": 3.8066, + "step": 349184 + }, + { + "epoch": 0.01, + "learning_rate": 4.4274772776855836e-05, + "loss": 3.8114, + "step": 349696 + }, + { + "epoch": 0.01, + "learning_rate": 4.4266386829345316e-05, + "loss": 3.8171, + "step": 350208 + }, + { + "epoch": 0.01, + "learning_rate": 4.4258000881834796e-05, + "loss": 3.8007, + "step": 350720 + }, + { + "epoch": 0.02, + "learning_rate": 4.4249614934324276e-05, + "loss": 3.8088, + "step": 351232 + }, + { + "epoch": 0.02, + "learning_rate": 4.4241245365617485e-05, + "loss": 3.7974, + "step": 351744 + }, + { + "epoch": 0.02, + "learning_rate": 4.4232859418106965e-05, + "loss": 3.806, + "step": 352256 + }, + { + "epoch": 0.02, + "learning_rate": 4.4224473470596445e-05, + "loss": 3.8086, + "step": 352768 + }, + { + "epoch": 0.02, + "learning_rate": 4.4216087523085925e-05, + "loss": 3.8102, + "step": 353280 + }, + { + "epoch": 0.02, + "learning_rate": 4.4207717954379134e-05, + "loss": 3.8176, + "step": 353792 + }, + { + "epoch": 0.02, + "learning_rate": 4.4199332006868614e-05, + "loss": 3.8044, + "step": 354304 + }, + { + "epoch": 0.02, + "learning_rate": 4.4190946059358094e-05, + "loss": 3.8048, + "step": 354816 + }, + { + "epoch": 0.02, + "learning_rate": 4.4182560111847573e-05, + "loss": 3.8072, + "step": 355328 + }, + { + "epoch": 0.02, + "learning_rate": 4.417419054314079e-05, + "loss": 3.8071, + "step": 355840 + }, + { + "epoch": 0.02, + "learning_rate": 4.416580459563027e-05, + "loss": 3.8125, + "step": 356352 + }, + { + "epoch": 0.02, + "learning_rate": 4.415741864811975e-05, + "loss": 3.7961, + "step": 356864 + }, + { + "epoch": 0.02, + "learning_rate": 4.414903270060923e-05, + "loss": 3.8049, + "step": 357376 + }, + { + "epoch": 0.02, + "learning_rate": 4.414066313190244e-05, + "loss": 3.8035, + "step": 357888 + }, + { + "epoch": 0.02, + "learning_rate": 4.413227718439192e-05, + "loss": 3.8115, + "step": 358400 + }, + { + "epoch": 0.02, + "learning_rate": 4.41238912368814e-05, + "loss": 3.7911, + "step": 358912 + }, + { + "epoch": 0.02, + "learning_rate": 4.411550528937088e-05, + "loss": 3.7982, + "step": 359424 + }, + { + "epoch": 0.02, + "learning_rate": 4.410713572066409e-05, + "loss": 3.8096, + "step": 359936 + }, + { + "epoch": 0.02, + "learning_rate": 4.409874977315357e-05, + "loss": 3.8016, + "step": 360448 + }, + { + "epoch": 0.02, + "learning_rate": 4.409036382564305e-05, + "loss": 3.7899, + "step": 360960 + }, + { + "epoch": 0.02, + "learning_rate": 4.408197787813253e-05, + "loss": 3.8029, + "step": 361472 + }, + { + "epoch": 0.02, + "learning_rate": 4.407359193062201e-05, + "loss": 3.7952, + "step": 361984 + }, + { + "epoch": 0.02, + "learning_rate": 4.4065205983111494e-05, + "loss": 3.8114, + "step": 362496 + }, + { + "epoch": 0.02, + "learning_rate": 4.4056820035600974e-05, + "loss": 3.7983, + "step": 363008 + }, + { + "epoch": 0.02, + "learning_rate": 4.404845046689418e-05, + "loss": 3.7945, + "step": 363520 + }, + { + "epoch": 0.02, + "learning_rate": 4.404006451938366e-05, + "loss": 3.8081, + "step": 364032 + }, + { + "epoch": 0.02, + "learning_rate": 4.403167857187314e-05, + "loss": 3.8131, + "step": 364544 + }, + { + "epoch": 0.02, + "learning_rate": 4.402329262436262e-05, + "loss": 3.8047, + "step": 365056 + }, + { + "epoch": 0.02, + "learning_rate": 4.401492305565583e-05, + "loss": 3.8, + "step": 365568 + }, + { + "epoch": 0.02, + "learning_rate": 4.400653710814531e-05, + "loss": 3.8075, + "step": 366080 + }, + { + "epoch": 0.02, + "learning_rate": 4.399815116063479e-05, + "loss": 3.801, + "step": 366592 + }, + { + "epoch": 0.02, + "learning_rate": 4.398976521312427e-05, + "loss": 3.7938, + "step": 367104 + }, + { + "epoch": 0.02, + "learning_rate": 4.398139564441748e-05, + "loss": 3.804, + "step": 367616 + }, + { + "epoch": 0.02, + "learning_rate": 4.397300969690696e-05, + "loss": 3.7924, + "step": 368128 + }, + { + "epoch": 0.02, + "learning_rate": 4.396462374939645e-05, + "loss": 3.7997, + "step": 368640 + }, + { + "epoch": 0.02, + "learning_rate": 4.395623780188593e-05, + "loss": 3.7938, + "step": 369152 + }, + { + "epoch": 0.02, + "learning_rate": 4.3947868233179137e-05, + "loss": 3.8012, + "step": 369664 + }, + { + "epoch": 0.02, + "learning_rate": 4.3939482285668617e-05, + "loss": 3.7893, + "step": 370176 + }, + { + "epoch": 0.02, + "learning_rate": 4.3931096338158096e-05, + "loss": 3.7885, + "step": 370688 + }, + { + "epoch": 0.02, + "learning_rate": 4.3922710390647576e-05, + "loss": 3.7953, + "step": 371200 + }, + { + "epoch": 0.02, + "learning_rate": 4.3914340821940786e-05, + "loss": 3.8064, + "step": 371712 + }, + { + "epoch": 0.02, + "learning_rate": 4.3905954874430265e-05, + "loss": 3.806, + "step": 372224 + }, + { + "epoch": 0.02, + "learning_rate": 4.3897568926919745e-05, + "loss": 3.804, + "step": 372736 + }, + { + "epoch": 0.02, + "learning_rate": 4.3889182979409225e-05, + "loss": 3.7847, + "step": 373248 + }, + { + "epoch": 0.02, + "learning_rate": 4.3880813410702434e-05, + "loss": 3.8025, + "step": 373760 + }, + { + "epoch": 0.02, + "learning_rate": 4.3872427463191914e-05, + "loss": 3.8026, + "step": 374272 + }, + { + "epoch": 0.02, + "learning_rate": 4.3864041515681394e-05, + "loss": 3.8003, + "step": 374784 + }, + { + "epoch": 0.02, + "learning_rate": 4.385565556817088e-05, + "loss": 3.7918, + "step": 375296 + }, + { + "epoch": 0.02, + "learning_rate": 4.384728599946409e-05, + "loss": 3.805, + "step": 375808 + }, + { + "epoch": 0.02, + "learning_rate": 4.383890005195357e-05, + "loss": 3.793, + "step": 376320 + }, + { + "epoch": 0.02, + "learning_rate": 4.383051410444305e-05, + "loss": 3.8055, + "step": 376832 + }, + { + "epoch": 0.02, + "learning_rate": 4.382212815693253e-05, + "loss": 3.782, + "step": 377344 + }, + { + "epoch": 0.02, + "learning_rate": 4.381375858822574e-05, + "loss": 3.8051, + "step": 377856 + }, + { + "epoch": 0.02, + "learning_rate": 4.380537264071522e-05, + "loss": 3.7878, + "step": 378368 + }, + { + "epoch": 0.02, + "learning_rate": 4.37969866932047e-05, + "loss": 3.7875, + "step": 378880 + }, + { + "epoch": 0.02, + "learning_rate": 4.378860074569418e-05, + "loss": 3.7985, + "step": 379392 + }, + { + "epoch": 0.02, + "learning_rate": 4.378023117698739e-05, + "loss": 3.7962, + "step": 379904 + }, + { + "epoch": 0.02, + "learning_rate": 4.377184522947687e-05, + "loss": 3.8, + "step": 380416 + }, + { + "epoch": 0.02, + "learning_rate": 4.376345928196635e-05, + "loss": 3.7821, + "step": 380928 + }, + { + "epoch": 0.02, + "learning_rate": 4.3755073334455835e-05, + "loss": 3.7981, + "step": 381440 + }, + { + "epoch": 0.03, + "eval_loss": 3.8869946002960205, + "eval_runtime": 304.6963, + "eval_samples_per_second": 1252.365, + "eval_steps_per_second": 39.137, + "step": 381600 + }, + { + "epoch": 0.0, + "learning_rate": 4.3746687386945315e-05, + "loss": 3.788, + "step": 381952 + }, + { + "epoch": 0.0, + "learning_rate": 4.373830143943479e-05, + "loss": 3.7855, + "step": 382464 + }, + { + "epoch": 0.0, + "learning_rate": 4.372991549192427e-05, + "loss": 3.7967, + "step": 382976 + }, + { + "epoch": 0.0, + "learning_rate": 4.372152954441375e-05, + "loss": 3.7884, + "step": 383488 + }, + { + "epoch": 0.0, + "learning_rate": 4.371314359690323e-05, + "loss": 3.8014, + "step": 384000 + }, + { + "epoch": 0.0, + "learning_rate": 4.370475764939271e-05, + "loss": 3.7846, + "step": 384512 + }, + { + "epoch": 0.0, + "learning_rate": 4.369637170188219e-05, + "loss": 3.7848, + "step": 385024 + }, + { + "epoch": 0.0, + "learning_rate": 4.368798575437167e-05, + "loss": 3.7859, + "step": 385536 + }, + { + "epoch": 0.0, + "learning_rate": 4.367961618566488e-05, + "loss": 3.7922, + "step": 386048 + }, + { + "epoch": 0.0, + "learning_rate": 4.367123023815436e-05, + "loss": 3.7938, + "step": 386560 + }, + { + "epoch": 0.0, + "learning_rate": 4.366284429064384e-05, + "loss": 3.7851, + "step": 387072 + }, + { + "epoch": 0.0, + "learning_rate": 4.365445834313332e-05, + "loss": 3.7946, + "step": 387584 + }, + { + "epoch": 0.0, + "learning_rate": 4.364608877442653e-05, + "loss": 3.7785, + "step": 388096 + }, + { + "epoch": 0.0, + "learning_rate": 4.363770282691601e-05, + "loss": 3.7832, + "step": 388608 + }, + { + "epoch": 0.0, + "learning_rate": 4.362931687940549e-05, + "loss": 3.7848, + "step": 389120 + }, + { + "epoch": 0.0, + "learning_rate": 4.362093093189497e-05, + "loss": 3.7767, + "step": 389632 + }, + { + "epoch": 0.0, + "learning_rate": 4.361256136318818e-05, + "loss": 3.7848, + "step": 390144 + }, + { + "epoch": 0.0, + "learning_rate": 4.360417541567766e-05, + "loss": 3.7783, + "step": 390656 + }, + { + "epoch": 0.0, + "learning_rate": 4.359578946816714e-05, + "loss": 3.7784, + "step": 391168 + }, + { + "epoch": 0.0, + "learning_rate": 4.358740352065662e-05, + "loss": 3.7993, + "step": 391680 + }, + { + "epoch": 0.0, + "learning_rate": 4.357903395194983e-05, + "loss": 3.7871, + "step": 392192 + }, + { + "epoch": 0.0, + "learning_rate": 4.357064800443931e-05, + "loss": 3.7922, + "step": 392704 + }, + { + "epoch": 0.0, + "learning_rate": 4.356226205692879e-05, + "loss": 3.7857, + "step": 393216 + }, + { + "epoch": 0.0, + "learning_rate": 4.355387610941827e-05, + "loss": 3.7884, + "step": 393728 + }, + { + "epoch": 0.0, + "learning_rate": 4.3545506540711486e-05, + "loss": 3.7724, + "step": 394240 + }, + { + "epoch": 0.0, + "learning_rate": 4.3537120593200966e-05, + "loss": 3.791, + "step": 394752 + }, + { + "epoch": 0.0, + "learning_rate": 4.3528734645690446e-05, + "loss": 3.7813, + "step": 395264 + }, + { + "epoch": 0.0, + "learning_rate": 4.3520348698179926e-05, + "loss": 3.7708, + "step": 395776 + }, + { + "epoch": 0.0, + "learning_rate": 4.3511979129473135e-05, + "loss": 3.7729, + "step": 396288 + }, + { + "epoch": 0.0, + "learning_rate": 4.3503593181962615e-05, + "loss": 3.7767, + "step": 396800 + }, + { + "epoch": 0.01, + "learning_rate": 4.3495207234452095e-05, + "loss": 3.7804, + "step": 397312 + }, + { + "epoch": 0.01, + "learning_rate": 4.3486821286941575e-05, + "loss": 3.7907, + "step": 397824 + }, + { + "epoch": 0.01, + "learning_rate": 4.3478451718234784e-05, + "loss": 3.7876, + "step": 398336 + }, + { + "epoch": 0.01, + "learning_rate": 4.3470065770724264e-05, + "loss": 3.7797, + "step": 398848 + }, + { + "epoch": 0.01, + "learning_rate": 4.3461679823213744e-05, + "loss": 3.7751, + "step": 399360 + }, + { + "epoch": 0.01, + "learning_rate": 4.3453293875703224e-05, + "loss": 3.7806, + "step": 399872 + }, + { + "epoch": 0.01, + "learning_rate": 4.344492430699644e-05, + "loss": 3.7773, + "step": 400384 + }, + { + "epoch": 0.01, + "learning_rate": 4.343653835948592e-05, + "loss": 3.7818, + "step": 400896 + }, + { + "epoch": 0.01, + "learning_rate": 4.34281524119754e-05, + "loss": 3.7685, + "step": 401408 + }, + { + "epoch": 0.01, + "learning_rate": 4.341978284326861e-05, + "loss": 3.7702, + "step": 401920 + }, + { + "epoch": 0.01, + "learning_rate": 4.341141327456182e-05, + "loss": 3.7749, + "step": 402432 + }, + { + "epoch": 0.01, + "learning_rate": 4.34030273270513e-05, + "loss": 3.7857, + "step": 402944 + }, + { + "epoch": 0.01, + "learning_rate": 4.339464137954078e-05, + "loss": 3.7792, + "step": 403456 + }, + { + "epoch": 0.01, + "learning_rate": 4.338625543203026e-05, + "loss": 3.7735, + "step": 403968 + }, + { + "epoch": 0.01, + "learning_rate": 4.337786948451974e-05, + "loss": 3.7791, + "step": 404480 + }, + { + "epoch": 0.01, + "learning_rate": 4.336948353700922e-05, + "loss": 3.7725, + "step": 404992 + }, + { + "epoch": 0.01, + "learning_rate": 4.33610975894987e-05, + "loss": 3.7811, + "step": 405504 + }, + { + "epoch": 0.01, + "learning_rate": 4.335271164198818e-05, + "loss": 3.7672, + "step": 406016 + }, + { + "epoch": 0.01, + "learning_rate": 4.3344342073281393e-05, + "loss": 3.7601, + "step": 406528 + }, + { + "epoch": 0.01, + "learning_rate": 4.3335956125770873e-05, + "loss": 3.7807, + "step": 407040 + }, + { + "epoch": 0.01, + "learning_rate": 4.3327570178260353e-05, + "loss": 3.7737, + "step": 407552 + }, + { + "epoch": 0.01, + "learning_rate": 4.331918423074983e-05, + "loss": 3.7706, + "step": 408064 + }, + { + "epoch": 0.01, + "learning_rate": 4.331081466204304e-05, + "loss": 3.7688, + "step": 408576 + }, + { + "epoch": 0.01, + "learning_rate": 4.330242871453252e-05, + "loss": 3.7702, + "step": 409088 + }, + { + "epoch": 0.01, + "learning_rate": 4.3294042767022e-05, + "loss": 3.7597, + "step": 409600 + }, + { + "epoch": 0.01, + "learning_rate": 4.328565681951148e-05, + "loss": 3.7684, + "step": 410112 + }, + { + "epoch": 0.01, + "learning_rate": 4.327728725080469e-05, + "loss": 3.7623, + "step": 410624 + }, + { + "epoch": 0.01, + "learning_rate": 4.326890130329417e-05, + "loss": 3.7676, + "step": 411136 + }, + { + "epoch": 0.01, + "learning_rate": 4.326051535578365e-05, + "loss": 3.7757, + "step": 411648 + }, + { + "epoch": 0.01, + "learning_rate": 4.325212940827313e-05, + "loss": 3.766, + "step": 412160 + }, + { + "epoch": 0.01, + "learning_rate": 4.324375983956635e-05, + "loss": 3.768, + "step": 412672 + }, + { + "epoch": 0.01, + "learning_rate": 4.323537389205583e-05, + "loss": 3.7793, + "step": 413184 + }, + { + "epoch": 0.01, + "learning_rate": 4.322698794454531e-05, + "loss": 3.7547, + "step": 413696 + }, + { + "epoch": 0.01, + "learning_rate": 4.321860199703479e-05, + "loss": 3.7588, + "step": 414208 + }, + { + "epoch": 0.01, + "learning_rate": 4.3210232428327996e-05, + "loss": 3.7677, + "step": 414720 + }, + { + "epoch": 0.01, + "learning_rate": 4.3201846480817476e-05, + "loss": 3.7699, + "step": 415232 + }, + { + "epoch": 0.01, + "learning_rate": 4.3193460533306956e-05, + "loss": 3.7609, + "step": 415744 + }, + { + "epoch": 0.01, + "learning_rate": 4.3185074585796436e-05, + "loss": 3.7618, + "step": 416256 + }, + { + "epoch": 0.01, + "learning_rate": 4.3176705017089645e-05, + "loss": 3.756, + "step": 416768 + }, + { + "epoch": 0.01, + "learning_rate": 4.3168319069579125e-05, + "loss": 3.7656, + "step": 417280 + }, + { + "epoch": 0.01, + "learning_rate": 4.3159933122068605e-05, + "loss": 3.77, + "step": 417792 + }, + { + "epoch": 0.01, + "learning_rate": 4.3151547174558085e-05, + "loss": 3.7683, + "step": 418304 + }, + { + "epoch": 0.01, + "learning_rate": 4.31431776058513e-05, + "loss": 3.7747, + "step": 418816 + }, + { + "epoch": 0.01, + "learning_rate": 4.313479165834078e-05, + "loss": 3.7692, + "step": 419328 + }, + { + "epoch": 0.01, + "learning_rate": 4.312640571083026e-05, + "loss": 3.794, + "step": 419840 + }, + { + "epoch": 0.01, + "learning_rate": 4.311801976331974e-05, + "loss": 3.756, + "step": 420352 + }, + { + "epoch": 0.01, + "learning_rate": 4.310965019461295e-05, + "loss": 3.7652, + "step": 420864 + }, + { + "epoch": 0.01, + "learning_rate": 4.310126424710243e-05, + "loss": 3.7737, + "step": 421376 + }, + { + "epoch": 0.01, + "learning_rate": 4.309287829959191e-05, + "loss": 3.7502, + "step": 421888 + }, + { + "epoch": 0.01, + "learning_rate": 4.308449235208139e-05, + "loss": 3.7719, + "step": 422400 + }, + { + "epoch": 0.01, + "learning_rate": 4.30761227833746e-05, + "loss": 3.7706, + "step": 422912 + }, + { + "epoch": 0.01, + "learning_rate": 4.306773683586408e-05, + "loss": 3.7696, + "step": 423424 + }, + { + "epoch": 0.01, + "learning_rate": 4.305935088835356e-05, + "loss": 3.7666, + "step": 423936 + }, + { + "epoch": 0.01, + "learning_rate": 4.305096494084304e-05, + "loss": 3.7529, + "step": 424448 + }, + { + "epoch": 0.01, + "learning_rate": 4.3042595372136254e-05, + "loss": 3.7632, + "step": 424960 + }, + { + "epoch": 0.01, + "learning_rate": 4.3034209424625734e-05, + "loss": 3.7543, + "step": 425472 + }, + { + "epoch": 0.01, + "learning_rate": 4.3025823477115214e-05, + "loss": 3.7671, + "step": 425984 + }, + { + "epoch": 0.01, + "learning_rate": 4.301743752960469e-05, + "loss": 3.7696, + "step": 426496 + }, + { + "epoch": 0.01, + "learning_rate": 4.3009067960897903e-05, + "loss": 3.7494, + "step": 427008 + }, + { + "epoch": 0.02, + "learning_rate": 4.3000682013387383e-05, + "loss": 3.7655, + "step": 427520 + }, + { + "epoch": 0.02, + "learning_rate": 4.299229606587686e-05, + "loss": 3.7556, + "step": 428032 + }, + { + "epoch": 0.02, + "learning_rate": 4.2983910118366337e-05, + "loss": 3.7566, + "step": 428544 + }, + { + "epoch": 0.02, + "learning_rate": 4.297554054965955e-05, + "loss": 3.7584, + "step": 429056 + }, + { + "epoch": 0.02, + "learning_rate": 4.296715460214903e-05, + "loss": 3.7701, + "step": 429568 + }, + { + "epoch": 0.02, + "learning_rate": 4.295876865463851e-05, + "loss": 3.7739, + "step": 430080 + }, + { + "epoch": 0.02, + "learning_rate": 4.295038270712799e-05, + "loss": 3.7533, + "step": 430592 + }, + { + "epoch": 0.02, + "learning_rate": 4.294201313842121e-05, + "loss": 3.7569, + "step": 431104 + }, + { + "epoch": 0.02, + "learning_rate": 4.293362719091069e-05, + "loss": 3.7631, + "step": 431616 + }, + { + "epoch": 0.02, + "learning_rate": 4.292524124340016e-05, + "loss": 3.757, + "step": 432128 + }, + { + "epoch": 0.02, + "learning_rate": 4.291687167469338e-05, + "loss": 3.7694, + "step": 432640 + }, + { + "epoch": 0.02, + "learning_rate": 4.290848572718286e-05, + "loss": 3.7515, + "step": 433152 + }, + { + "epoch": 0.02, + "learning_rate": 4.290009977967234e-05, + "loss": 3.7567, + "step": 433664 + }, + { + "epoch": 0.02, + "learning_rate": 4.289171383216181e-05, + "loss": 3.7605, + "step": 434176 + }, + { + "epoch": 0.02, + "learning_rate": 4.288332788465129e-05, + "loss": 3.7661, + "step": 434688 + }, + { + "epoch": 0.02, + "learning_rate": 4.287494193714077e-05, + "loss": 3.7447, + "step": 435200 + }, + { + "epoch": 0.02, + "learning_rate": 4.286655598963025e-05, + "loss": 3.7528, + "step": 435712 + }, + { + "epoch": 0.02, + "learning_rate": 4.285817004211973e-05, + "loss": 3.7597, + "step": 436224 + }, + { + "epoch": 0.02, + "learning_rate": 4.2849800473412946e-05, + "loss": 3.7599, + "step": 436736 + }, + { + "epoch": 0.02, + "learning_rate": 4.2841414525902426e-05, + "loss": 3.744, + "step": 437248 + }, + { + "epoch": 0.02, + "learning_rate": 4.2833028578391906e-05, + "loss": 3.7647, + "step": 437760 + }, + { + "epoch": 0.02, + "learning_rate": 4.2824642630881386e-05, + "loss": 3.7481, + "step": 438272 + }, + { + "epoch": 0.02, + "learning_rate": 4.2816273062174595e-05, + "loss": 3.763, + "step": 438784 + }, + { + "epoch": 0.02, + "learning_rate": 4.2807887114664075e-05, + "loss": 3.7556, + "step": 439296 + }, + { + "epoch": 0.02, + "learning_rate": 4.2799501167153555e-05, + "loss": 3.7511, + "step": 439808 + }, + { + "epoch": 0.02, + "learning_rate": 4.2791115219643035e-05, + "loss": 3.7609, + "step": 440320 + }, + { + "epoch": 0.02, + "learning_rate": 4.2782745650936244e-05, + "loss": 3.7698, + "step": 440832 + }, + { + "epoch": 0.02, + "learning_rate": 4.277437608222945e-05, + "loss": 3.7576, + "step": 441344 + }, + { + "epoch": 0.02, + "learning_rate": 4.276599013471893e-05, + "loss": 3.7556, + "step": 441856 + }, + { + "epoch": 0.02, + "learning_rate": 4.275760418720842e-05, + "loss": 3.763, + "step": 442368 + }, + { + "epoch": 0.02, + "learning_rate": 4.27492182396979e-05, + "loss": 3.7567, + "step": 442880 + }, + { + "epoch": 0.02, + "learning_rate": 4.274083229218738e-05, + "loss": 3.7494, + "step": 443392 + }, + { + "epoch": 0.02, + "learning_rate": 4.273244634467686e-05, + "loss": 3.755, + "step": 443904 + }, + { + "epoch": 0.02, + "learning_rate": 4.272406039716634e-05, + "loss": 3.7505, + "step": 444416 + }, + { + "epoch": 0.02, + "learning_rate": 4.271567444965582e-05, + "loss": 3.7546, + "step": 444928 + }, + { + "epoch": 0.02, + "learning_rate": 4.270730488094903e-05, + "loss": 3.7539, + "step": 445440 + }, + { + "epoch": 0.02, + "learning_rate": 4.269891893343851e-05, + "loss": 3.7515, + "step": 445952 + }, + { + "epoch": 0.02, + "learning_rate": 4.269053298592799e-05, + "loss": 3.7513, + "step": 446464 + }, + { + "epoch": 0.02, + "learning_rate": 4.268214703841747e-05, + "loss": 3.7422, + "step": 446976 + }, + { + "epoch": 0.02, + "learning_rate": 4.267377746971068e-05, + "loss": 3.7541, + "step": 447488 + }, + { + "epoch": 0.02, + "learning_rate": 4.266539152220016e-05, + "loss": 3.7624, + "step": 448000 + }, + { + "epoch": 0.02, + "learning_rate": 4.265700557468964e-05, + "loss": 3.7594, + "step": 448512 + }, + { + "epoch": 0.02, + "learning_rate": 4.264861962717912e-05, + "loss": 3.7611, + "step": 449024 + }, + { + "epoch": 0.02, + "learning_rate": 4.264025005847233e-05, + "loss": 3.7402, + "step": 449536 + }, + { + "epoch": 0.02, + "learning_rate": 4.263186411096181e-05, + "loss": 3.762, + "step": 450048 + }, + { + "epoch": 0.02, + "learning_rate": 4.262347816345129e-05, + "loss": 3.7581, + "step": 450560 + }, + { + "epoch": 0.02, + "learning_rate": 4.261509221594077e-05, + "loss": 3.755, + "step": 451072 + }, + { + "epoch": 0.02, + "learning_rate": 4.260672264723398e-05, + "loss": 3.7529, + "step": 451584 + }, + { + "epoch": 0.02, + "learning_rate": 4.259833669972346e-05, + "loss": 3.7577, + "step": 452096 + }, + { + "epoch": 0.02, + "learning_rate": 4.258995075221294e-05, + "loss": 3.7499, + "step": 452608 + }, + { + "epoch": 0.02, + "learning_rate": 4.258156480470242e-05, + "loss": 3.7673, + "step": 453120 + }, + { + "epoch": 0.02, + "learning_rate": 4.257319523599563e-05, + "loss": 3.7375, + "step": 453632 + }, + { + "epoch": 0.02, + "learning_rate": 4.256480928848511e-05, + "loss": 3.7597, + "step": 454144 + }, + { + "epoch": 0.02, + "learning_rate": 4.255642334097459e-05, + "loss": 3.7439, + "step": 454656 + }, + { + "epoch": 0.02, + "learning_rate": 4.254805377226781e-05, + "loss": 3.7472, + "step": 455168 + }, + { + "epoch": 0.02, + "learning_rate": 4.253966782475729e-05, + "loss": 3.7557, + "step": 455680 + }, + { + "epoch": 0.02, + "learning_rate": 4.253128187724677e-05, + "loss": 3.7488, + "step": 456192 + }, + { + "epoch": 0.02, + "learning_rate": 4.252289592973625e-05, + "loss": 3.7633, + "step": 456704 + }, + { + "epoch": 0.02, + "learning_rate": 4.2514526361029456e-05, + "loss": 3.735, + "step": 457216 + }, + { + "epoch": 0.02, + "learning_rate": 4.2506140413518936e-05, + "loss": 3.7583, + "step": 457728 + }, + { + "epoch": 0.03, + "eval_loss": 3.869764566421509, + "eval_runtime": 309.915, + "eval_samples_per_second": 1231.276, + "eval_steps_per_second": 38.478, + "step": 457920 + }, + { + "epoch": 1.0, + "learning_rate": 4.2497754466008416e-05, + "loss": 3.7511, + "step": 458240 + }, + { + "epoch": 1.0, + "learning_rate": 4.2489368518497896e-05, + "loss": 3.7398, + "step": 458752 + }, + { + "epoch": 1.0, + "learning_rate": 4.2480998949791105e-05, + "loss": 3.7546, + "step": 459264 + }, + { + "epoch": 1.0, + "learning_rate": 4.2472613002280585e-05, + "loss": 3.7465, + "step": 459776 + }, + { + "epoch": 1.0, + "learning_rate": 4.2464227054770065e-05, + "loss": 3.7566, + "step": 460288 + }, + { + "epoch": 1.0, + "learning_rate": 4.2455841107259545e-05, + "loss": 3.7436, + "step": 460800 + }, + { + "epoch": 1.0, + "learning_rate": 4.244747153855276e-05, + "loss": 3.7408, + "step": 461312 + }, + { + "epoch": 1.0, + "learning_rate": 4.243908559104224e-05, + "loss": 3.744, + "step": 461824 + }, + { + "epoch": 1.0, + "learning_rate": 4.243069964353172e-05, + "loss": 3.7485, + "step": 462336 + }, + { + "epoch": 1.0, + "learning_rate": 4.24223136960212e-05, + "loss": 3.7495, + "step": 462848 + }, + { + "epoch": 1.0, + "learning_rate": 4.241394412731441e-05, + "loss": 3.7485, + "step": 463360 + }, + { + "epoch": 1.0, + "learning_rate": 4.240555817980389e-05, + "loss": 3.749, + "step": 463872 + }, + { + "epoch": 1.0, + "learning_rate": 4.239717223229337e-05, + "loss": 3.7407, + "step": 464384 + }, + { + "epoch": 1.0, + "learning_rate": 4.238878628478285e-05, + "loss": 3.7394, + "step": 464896 + }, + { + "epoch": 1.0, + "learning_rate": 4.238041671607606e-05, + "loss": 3.7437, + "step": 465408 + }, + { + "epoch": 1.0, + "learning_rate": 4.237203076856554e-05, + "loss": 3.7357, + "step": 465920 + }, + { + "epoch": 1.0, + "learning_rate": 4.236364482105502e-05, + "loss": 3.7432, + "step": 466432 + }, + { + "epoch": 1.0, + "learning_rate": 4.23552588735445e-05, + "loss": 3.7376, + "step": 466944 + }, + { + "epoch": 1.0, + "learning_rate": 4.2346889304837714e-05, + "loss": 3.737, + "step": 467456 + }, + { + "epoch": 1.0, + "learning_rate": 4.2338503357327194e-05, + "loss": 3.7546, + "step": 467968 + }, + { + "epoch": 1.0, + "learning_rate": 4.2330117409816674e-05, + "loss": 3.7475, + "step": 468480 + }, + { + "epoch": 1.0, + "learning_rate": 4.2321731462306154e-05, + "loss": 3.7508, + "step": 468992 + }, + { + "epoch": 1.0, + "learning_rate": 4.231336189359936e-05, + "loss": 3.7439, + "step": 469504 + }, + { + "epoch": 1.0, + "learning_rate": 4.230497594608884e-05, + "loss": 3.7471, + "step": 470016 + }, + { + "epoch": 1.0, + "learning_rate": 4.229658999857832e-05, + "loss": 3.7306, + "step": 470528 + }, + { + "epoch": 1.0, + "learning_rate": 4.22882040510678e-05, + "loss": 3.7509, + "step": 471040 + }, + { + "epoch": 1.0, + "learning_rate": 4.227983448236101e-05, + "loss": 3.7391, + "step": 471552 + }, + { + "epoch": 1.0, + "learning_rate": 4.227144853485049e-05, + "loss": 3.7314, + "step": 472064 + }, + { + "epoch": 1.0, + "learning_rate": 4.226306258733997e-05, + "loss": 3.7343, + "step": 472576 + }, + { + "epoch": 1.0, + "learning_rate": 4.225467663982945e-05, + "loss": 3.7377, + "step": 473088 + }, + { + "epoch": 1.01, + "learning_rate": 4.224630707112267e-05, + "loss": 3.7406, + "step": 473600 + }, + { + "epoch": 1.01, + "learning_rate": 4.223792112361215e-05, + "loss": 3.749, + "step": 474112 + }, + { + "epoch": 1.01, + "learning_rate": 4.222953517610163e-05, + "loss": 3.7454, + "step": 474624 + }, + { + "epoch": 1.01, + "learning_rate": 4.222114922859111e-05, + "loss": 3.7413, + "step": 475136 + }, + { + "epoch": 1.01, + "learning_rate": 4.221277965988432e-05, + "loss": 3.7287, + "step": 475648 + }, + { + "epoch": 1.01, + "learning_rate": 4.22043937123738e-05, + "loss": 3.7446, + "step": 476160 + }, + { + "epoch": 1.01, + "learning_rate": 4.219600776486328e-05, + "loss": 3.739, + "step": 476672 + }, + { + "epoch": 1.01, + "learning_rate": 4.218762181735276e-05, + "loss": 3.7332, + "step": 477184 + }, + { + "epoch": 1.01, + "learning_rate": 4.2179252248645966e-05, + "loss": 3.7325, + "step": 477696 + }, + { + "epoch": 1.01, + "learning_rate": 4.2170866301135446e-05, + "loss": 3.7289, + "step": 478208 + }, + { + "epoch": 1.01, + "learning_rate": 4.2162480353624926e-05, + "loss": 3.736, + "step": 478720 + }, + { + "epoch": 1.01, + "learning_rate": 4.2154110784918135e-05, + "loss": 3.7434, + "step": 479232 + }, + { + "epoch": 1.01, + "learning_rate": 4.214572483740762e-05, + "loss": 3.7387, + "step": 479744 + }, + { + "epoch": 1.01, + "learning_rate": 4.21373388898971e-05, + "loss": 3.735, + "step": 480256 + }, + { + "epoch": 1.01, + "learning_rate": 4.212895294238658e-05, + "loss": 3.7389, + "step": 480768 + }, + { + "epoch": 1.01, + "learning_rate": 4.212056699487606e-05, + "loss": 3.7313, + "step": 481280 + }, + { + "epoch": 1.01, + "learning_rate": 4.2112181047365535e-05, + "loss": 3.7419, + "step": 481792 + }, + { + "epoch": 1.01, + "learning_rate": 4.2103795099855015e-05, + "loss": 3.7281, + "step": 482304 + }, + { + "epoch": 1.01, + "learning_rate": 4.209542553114823e-05, + "loss": 3.7159, + "step": 482816 + }, + { + "epoch": 1.01, + "learning_rate": 4.208703958363771e-05, + "loss": 3.7403, + "step": 483328 + }, + { + "epoch": 1.01, + "learning_rate": 4.2078653636127184e-05, + "loss": 3.7346, + "step": 483840 + }, + { + "epoch": 1.01, + "learning_rate": 4.2070267688616664e-05, + "loss": 3.7339, + "step": 484352 + }, + { + "epoch": 1.01, + "learning_rate": 4.206189811990988e-05, + "loss": 3.7274, + "step": 484864 + }, + { + "epoch": 1.01, + "learning_rate": 4.205351217239936e-05, + "loss": 3.7273, + "step": 485376 + }, + { + "epoch": 1.01, + "learning_rate": 4.204512622488884e-05, + "loss": 3.7226, + "step": 485888 + }, + { + "epoch": 1.01, + "learning_rate": 4.203674027737832e-05, + "loss": 3.7266, + "step": 486400 + }, + { + "epoch": 1.01, + "learning_rate": 4.2028370708671535e-05, + "loss": 3.7237, + "step": 486912 + }, + { + "epoch": 1.01, + "learning_rate": 4.201998476116101e-05, + "loss": 3.7315, + "step": 487424 + }, + { + "epoch": 1.01, + "learning_rate": 4.201159881365049e-05, + "loss": 3.7335, + "step": 487936 + }, + { + "epoch": 1.01, + "learning_rate": 4.200321286613997e-05, + "loss": 3.7268, + "step": 488448 + }, + { + "epoch": 1.01, + "learning_rate": 4.1994843297433184e-05, + "loss": 3.7265, + "step": 488960 + }, + { + "epoch": 1.01, + "learning_rate": 4.198645734992266e-05, + "loss": 3.747, + "step": 489472 + }, + { + "epoch": 1.01, + "learning_rate": 4.197807140241214e-05, + "loss": 3.7115, + "step": 489984 + }, + { + "epoch": 1.01, + "learning_rate": 4.196968545490162e-05, + "loss": 3.7171, + "step": 490496 + }, + { + "epoch": 1.01, + "learning_rate": 4.196131588619483e-05, + "loss": 3.7292, + "step": 491008 + }, + { + "epoch": 1.01, + "learning_rate": 4.195292993868431e-05, + "loss": 3.7335, + "step": 491520 + }, + { + "epoch": 1.01, + "learning_rate": 4.194454399117379e-05, + "loss": 3.7197, + "step": 492032 + }, + { + "epoch": 1.01, + "learning_rate": 4.193615804366327e-05, + "loss": 3.7226, + "step": 492544 + }, + { + "epoch": 1.01, + "learning_rate": 4.192780485376022e-05, + "loss": 3.7167, + "step": 493056 + }, + { + "epoch": 1.01, + "learning_rate": 4.19194189062497e-05, + "loss": 3.7243, + "step": 493568 + }, + { + "epoch": 1.01, + "learning_rate": 4.191103295873918e-05, + "loss": 3.7347, + "step": 494080 + }, + { + "epoch": 1.01, + "learning_rate": 4.190264701122866e-05, + "loss": 3.7279, + "step": 494592 + }, + { + "epoch": 1.01, + "learning_rate": 4.189426106371813e-05, + "loss": 3.7301, + "step": 495104 + }, + { + "epoch": 1.01, + "learning_rate": 4.188587511620761e-05, + "loss": 3.7334, + "step": 495616 + }, + { + "epoch": 1.01, + "learning_rate": 4.187748916869709e-05, + "loss": 3.7491, + "step": 496128 + }, + { + "epoch": 1.01, + "learning_rate": 4.186910322118657e-05, + "loss": 3.7229, + "step": 496640 + }, + { + "epoch": 1.01, + "learning_rate": 4.186073365247978e-05, + "loss": 3.7306, + "step": 497152 + }, + { + "epoch": 1.01, + "learning_rate": 4.185234770496927e-05, + "loss": 3.7341, + "step": 497664 + }, + { + "epoch": 1.01, + "learning_rate": 4.184396175745875e-05, + "loss": 3.711, + "step": 498176 + }, + { + "epoch": 1.01, + "learning_rate": 4.183557580994823e-05, + "loss": 3.7332, + "step": 498688 + }, + { + "epoch": 1.01, + "learning_rate": 4.1827206241241436e-05, + "loss": 3.7324, + "step": 499200 + }, + { + "epoch": 1.01, + "learning_rate": 4.1818820293730916e-05, + "loss": 3.7315, + "step": 499712 + }, + { + "epoch": 1.01, + "learning_rate": 4.1810434346220396e-05, + "loss": 3.7307, + "step": 500224 + }, + { + "epoch": 1.01, + "learning_rate": 4.1802048398709876e-05, + "loss": 3.7149, + "step": 500736 + }, + { + "epoch": 1.01, + "learning_rate": 4.1793678830003085e-05, + "loss": 3.7224, + "step": 501248 + }, + { + "epoch": 1.01, + "learning_rate": 4.1785292882492565e-05, + "loss": 3.7195, + "step": 501760 + }, + { + "epoch": 1.01, + "learning_rate": 4.1776906934982045e-05, + "loss": 3.7271, + "step": 502272 + }, + { + "epoch": 1.01, + "learning_rate": 4.1768520987471525e-05, + "loss": 3.731, + "step": 502784 + }, + { + "epoch": 1.01, + "learning_rate": 4.1760151418764734e-05, + "loss": 3.7158, + "step": 503296 + }, + { + "epoch": 1.02, + "learning_rate": 4.175176547125422e-05, + "loss": 3.7264, + "step": 503808 + }, + { + "epoch": 1.02, + "learning_rate": 4.17433795237437e-05, + "loss": 3.7179, + "step": 504320 + }, + { + "epoch": 1.02, + "learning_rate": 4.173499357623318e-05, + "loss": 3.7164, + "step": 504832 + }, + { + "epoch": 1.02, + "learning_rate": 4.172662400752639e-05, + "loss": 3.7242, + "step": 505344 + }, + { + "epoch": 1.02, + "learning_rate": 4.171823806001587e-05, + "loss": 3.7271, + "step": 505856 + }, + { + "epoch": 1.02, + "learning_rate": 4.170985211250535e-05, + "loss": 3.7371, + "step": 506368 + }, + { + "epoch": 1.02, + "learning_rate": 4.170146616499483e-05, + "loss": 3.7212, + "step": 506880 + }, + { + "epoch": 1.02, + "learning_rate": 4.169309659628804e-05, + "loss": 3.7188, + "step": 507392 + }, + { + "epoch": 1.02, + "learning_rate": 4.168471064877752e-05, + "loss": 3.7251, + "step": 507904 + }, + { + "epoch": 1.02, + "learning_rate": 4.1676324701267e-05, + "loss": 3.7227, + "step": 508416 + }, + { + "epoch": 1.02, + "learning_rate": 4.166795513256021e-05, + "loss": 3.729, + "step": 508928 + }, + { + "epoch": 1.02, + "learning_rate": 4.165956918504969e-05, + "loss": 3.7187, + "step": 509440 + }, + { + "epoch": 1.02, + "learning_rate": 4.1651183237539174e-05, + "loss": 3.7192, + "step": 509952 + }, + { + "epoch": 1.02, + "learning_rate": 4.1642797290028654e-05, + "loss": 3.7227, + "step": 510464 + }, + { + "epoch": 1.02, + "learning_rate": 4.1634411342518134e-05, + "loss": 3.7303, + "step": 510976 + }, + { + "epoch": 1.02, + "learning_rate": 4.1626025395007614e-05, + "loss": 3.7025, + "step": 511488 + }, + { + "epoch": 1.02, + "learning_rate": 4.1617639447497094e-05, + "loss": 3.7201, + "step": 512000 + }, + { + "epoch": 1.02, + "learning_rate": 4.1609253499986574e-05, + "loss": 3.7209, + "step": 512512 + }, + { + "epoch": 1.02, + "learning_rate": 4.160088393127978e-05, + "loss": 3.7257, + "step": 513024 + }, + { + "epoch": 1.02, + "learning_rate": 4.159249798376926e-05, + "loss": 3.7055, + "step": 513536 + }, + { + "epoch": 1.02, + "learning_rate": 4.158411203625874e-05, + "loss": 3.724, + "step": 514048 + }, + { + "epoch": 1.02, + "learning_rate": 4.157572608874822e-05, + "loss": 3.7176, + "step": 514560 + }, + { + "epoch": 1.02, + "learning_rate": 4.156735652004143e-05, + "loss": 3.7195, + "step": 515072 + }, + { + "epoch": 1.02, + "learning_rate": 4.155897057253091e-05, + "loss": 3.7237, + "step": 515584 + }, + { + "epoch": 1.02, + "learning_rate": 4.155058462502039e-05, + "loss": 3.7159, + "step": 516096 + }, + { + "epoch": 1.02, + "learning_rate": 4.154219867750987e-05, + "loss": 3.7202, + "step": 516608 + }, + { + "epoch": 1.02, + "learning_rate": 4.153382910880309e-05, + "loss": 3.7341, + "step": 517120 + }, + { + "epoch": 1.02, + "learning_rate": 4.15254595400963e-05, + "loss": 3.7228, + "step": 517632 + }, + { + "epoch": 1.02, + "learning_rate": 4.151707359258578e-05, + "loss": 3.7149, + "step": 518144 + }, + { + "epoch": 1.02, + "learning_rate": 4.150868764507526e-05, + "loss": 3.7291, + "step": 518656 + }, + { + "epoch": 1.02, + "learning_rate": 4.150030169756474e-05, + "loss": 3.7198, + "step": 519168 + }, + { + "epoch": 1.02, + "learning_rate": 4.1491915750054217e-05, + "loss": 3.707, + "step": 519680 + }, + { + "epoch": 1.02, + "learning_rate": 4.1483529802543697e-05, + "loss": 3.7232, + "step": 520192 + }, + { + "epoch": 1.02, + "learning_rate": 4.1475143855033176e-05, + "loss": 3.7117, + "step": 520704 + }, + { + "epoch": 1.02, + "learning_rate": 4.1466757907522656e-05, + "loss": 3.7172, + "step": 521216 + }, + { + "epoch": 1.02, + "learning_rate": 4.1458388338815866e-05, + "loss": 3.7178, + "step": 521728 + }, + { + "epoch": 1.02, + "learning_rate": 4.1450002391305345e-05, + "loss": 3.715, + "step": 522240 + }, + { + "epoch": 1.02, + "learning_rate": 4.1441616443794825e-05, + "loss": 3.7153, + "step": 522752 + }, + { + "epoch": 1.02, + "learning_rate": 4.1433230496284305e-05, + "loss": 3.7103, + "step": 523264 + }, + { + "epoch": 1.02, + "learning_rate": 4.142486092757752e-05, + "loss": 3.713, + "step": 523776 + }, + { + "epoch": 1.02, + "learning_rate": 4.1416474980067e-05, + "loss": 3.7286, + "step": 524288 + }, + { + "epoch": 1.02, + "learning_rate": 4.140808903255648e-05, + "loss": 3.7202, + "step": 524800 + }, + { + "epoch": 1.02, + "learning_rate": 4.139970308504596e-05, + "loss": 3.7266, + "step": 525312 + }, + { + "epoch": 1.02, + "learning_rate": 4.139133351633917e-05, + "loss": 3.7051, + "step": 525824 + }, + { + "epoch": 1.02, + "learning_rate": 4.138294756882865e-05, + "loss": 3.7304, + "step": 526336 + }, + { + "epoch": 1.02, + "learning_rate": 4.137456162131813e-05, + "loss": 3.7191, + "step": 526848 + }, + { + "epoch": 1.02, + "learning_rate": 4.136619205261134e-05, + "loss": 3.7204, + "step": 527360 + }, + { + "epoch": 1.02, + "learning_rate": 4.135780610510082e-05, + "loss": 3.7174, + "step": 527872 + }, + { + "epoch": 1.02, + "learning_rate": 4.13494201575903e-05, + "loss": 3.7225, + "step": 528384 + }, + { + "epoch": 1.02, + "learning_rate": 4.134103421007978e-05, + "loss": 3.7122, + "step": 528896 + }, + { + "epoch": 1.02, + "learning_rate": 4.1332664641372995e-05, + "loss": 3.7351, + "step": 529408 + }, + { + "epoch": 1.02, + "learning_rate": 4.1324278693862475e-05, + "loss": 3.7034, + "step": 529920 + }, + { + "epoch": 1.02, + "learning_rate": 4.1315892746351955e-05, + "loss": 3.7206, + "step": 530432 + }, + { + "epoch": 1.02, + "learning_rate": 4.1307506798841435e-05, + "loss": 3.7111, + "step": 530944 + }, + { + "epoch": 1.02, + "learning_rate": 4.1299137230134644e-05, + "loss": 3.7119, + "step": 531456 + }, + { + "epoch": 1.02, + "learning_rate": 4.1290751282624124e-05, + "loss": 3.7165, + "step": 531968 + }, + { + "epoch": 1.02, + "learning_rate": 4.1282365335113604e-05, + "loss": 3.7115, + "step": 532480 + }, + { + "epoch": 1.02, + "learning_rate": 4.1273979387603084e-05, + "loss": 3.7289, + "step": 532992 + }, + { + "epoch": 1.02, + "learning_rate": 4.126560981889629e-05, + "loss": 3.7016, + "step": 533504 + }, + { + "epoch": 1.02, + "learning_rate": 4.125722387138577e-05, + "loss": 3.7222, + "step": 534016 + }, + { + "epoch": 1.03, + "eval_loss": 3.8603949546813965, + "eval_runtime": 310.4727, + "eval_samples_per_second": 1229.064, + "eval_steps_per_second": 38.409, + "step": 534240 + }, + { + "epoch": 0.0, + "learning_rate": 4.124883792387525e-05, + "loss": 3.7157, + "step": 534528 + }, + { + "epoch": 0.0, + "learning_rate": 4.124045197636473e-05, + "loss": 3.7062, + "step": 535040 + }, + { + "epoch": 0.0, + "learning_rate": 4.123206602885421e-05, + "loss": 3.7179, + "step": 535552 + }, + { + "epoch": 0.0, + "learning_rate": 4.122368008134369e-05, + "loss": 3.7118, + "step": 536064 + }, + { + "epoch": 0.0, + "learning_rate": 4.121529413383317e-05, + "loss": 3.7197, + "step": 536576 + }, + { + "epoch": 0.0, + "learning_rate": 4.120690818632265e-05, + "loss": 3.7078, + "step": 537088 + }, + { + "epoch": 0.0, + "learning_rate": 4.119852223881213e-05, + "loss": 3.7105, + "step": 537600 + }, + { + "epoch": 0.0, + "learning_rate": 4.119013629130161e-05, + "loss": 3.7064, + "step": 538112 + }, + { + "epoch": 0.0, + "learning_rate": 4.118176672259482e-05, + "loss": 3.7153, + "step": 538624 + }, + { + "epoch": 0.0, + "learning_rate": 4.11733807750843e-05, + "loss": 3.7141, + "step": 539136 + }, + { + "epoch": 0.0, + "learning_rate": 4.116499482757378e-05, + "loss": 3.7135, + "step": 539648 + }, + { + "epoch": 0.0, + "learning_rate": 4.115660888006326e-05, + "loss": 3.7102, + "step": 540160 + }, + { + "epoch": 0.0, + "learning_rate": 4.114823931135647e-05, + "loss": 3.7052, + "step": 540672 + }, + { + "epoch": 0.0, + "learning_rate": 4.113985336384595e-05, + "loss": 3.7052, + "step": 541184 + }, + { + "epoch": 0.0, + "learning_rate": 4.113146741633544e-05, + "loss": 3.7111, + "step": 541696 + }, + { + "epoch": 0.0, + "learning_rate": 4.112308146882492e-05, + "loss": 3.6989, + "step": 542208 + }, + { + "epoch": 0.0, + "learning_rate": 4.1114711900118126e-05, + "loss": 3.7079, + "step": 542720 + }, + { + "epoch": 0.0, + "learning_rate": 4.1106325952607606e-05, + "loss": 3.7044, + "step": 543232 + }, + { + "epoch": 0.0, + "learning_rate": 4.1097940005097086e-05, + "loss": 3.7022, + "step": 543744 + }, + { + "epoch": 0.0, + "learning_rate": 4.1089554057586566e-05, + "loss": 3.7162, + "step": 544256 + }, + { + "epoch": 0.0, + "learning_rate": 4.1081184488879775e-05, + "loss": 3.716, + "step": 544768 + }, + { + "epoch": 0.0, + "learning_rate": 4.1072798541369255e-05, + "loss": 3.7174, + "step": 545280 + }, + { + "epoch": 0.0, + "learning_rate": 4.1064412593858735e-05, + "loss": 3.7094, + "step": 545792 + }, + { + "epoch": 0.0, + "learning_rate": 4.1056043025151944e-05, + "loss": 3.7115, + "step": 546304 + }, + { + "epoch": 0.0, + "learning_rate": 4.1047657077641424e-05, + "loss": 3.6978, + "step": 546816 + }, + { + "epoch": 0.0, + "learning_rate": 4.1039271130130904e-05, + "loss": 3.7168, + "step": 547328 + }, + { + "epoch": 0.0, + "learning_rate": 4.103088518262039e-05, + "loss": 3.7057, + "step": 547840 + }, + { + "epoch": 0.0, + "learning_rate": 4.10225156139136e-05, + "loss": 3.699, + "step": 548352 + }, + { + "epoch": 0.0, + "learning_rate": 4.101412966640308e-05, + "loss": 3.7003, + "step": 548864 + }, + { + "epoch": 0.0, + "learning_rate": 4.100574371889256e-05, + "loss": 3.7037, + "step": 549376 + }, + { + "epoch": 0.01, + "learning_rate": 4.099735777138204e-05, + "loss": 3.7049, + "step": 549888 + }, + { + "epoch": 0.01, + "learning_rate": 4.098898820267525e-05, + "loss": 3.7171, + "step": 550400 + }, + { + "epoch": 0.01, + "learning_rate": 4.098060225516473e-05, + "loss": 3.7128, + "step": 550912 + }, + { + "epoch": 0.01, + "learning_rate": 4.097221630765421e-05, + "loss": 3.7017, + "step": 551424 + }, + { + "epoch": 0.01, + "learning_rate": 4.096383036014369e-05, + "loss": 3.6992, + "step": 551936 + }, + { + "epoch": 0.01, + "learning_rate": 4.09554607914369e-05, + "loss": 3.7132, + "step": 552448 + }, + { + "epoch": 0.01, + "learning_rate": 4.094707484392638e-05, + "loss": 3.708, + "step": 552960 + }, + { + "epoch": 0.01, + "learning_rate": 4.093868889641586e-05, + "loss": 3.6987, + "step": 553472 + }, + { + "epoch": 0.01, + "learning_rate": 4.0930302948905345e-05, + "loss": 3.6998, + "step": 553984 + }, + { + "epoch": 0.01, + "learning_rate": 4.0921933380198554e-05, + "loss": 3.6947, + "step": 554496 + }, + { + "epoch": 0.01, + "learning_rate": 4.0913547432688034e-05, + "loss": 3.7003, + "step": 555008 + }, + { + "epoch": 0.01, + "learning_rate": 4.090517786398124e-05, + "loss": 3.7107, + "step": 555520 + }, + { + "epoch": 0.01, + "learning_rate": 4.089679191647072e-05, + "loss": 3.7063, + "step": 556032 + }, + { + "epoch": 0.01, + "learning_rate": 4.08884059689602e-05, + "loss": 3.7002, + "step": 556544 + }, + { + "epoch": 0.01, + "learning_rate": 4.088002002144968e-05, + "loss": 3.7051, + "step": 557056 + }, + { + "epoch": 0.01, + "learning_rate": 4.087163407393916e-05, + "loss": 3.698, + "step": 557568 + }, + { + "epoch": 0.01, + "learning_rate": 4.086324812642864e-05, + "loss": 3.7087, + "step": 558080 + }, + { + "epoch": 0.01, + "learning_rate": 4.085486217891812e-05, + "loss": 3.6966, + "step": 558592 + }, + { + "epoch": 0.01, + "learning_rate": 4.084649261021133e-05, + "loss": 3.6803, + "step": 559104 + }, + { + "epoch": 0.01, + "learning_rate": 4.083810666270081e-05, + "loss": 3.7092, + "step": 559616 + }, + { + "epoch": 0.01, + "learning_rate": 4.08297207151903e-05, + "loss": 3.7011, + "step": 560128 + }, + { + "epoch": 0.01, + "learning_rate": 4.082133476767978e-05, + "loss": 3.7037, + "step": 560640 + }, + { + "epoch": 0.01, + "learning_rate": 4.081296519897299e-05, + "loss": 3.6938, + "step": 561152 + }, + { + "epoch": 0.01, + "learning_rate": 4.080457925146247e-05, + "loss": 3.6926, + "step": 561664 + }, + { + "epoch": 0.01, + "learning_rate": 4.079619330395195e-05, + "loss": 3.6893, + "step": 562176 + }, + { + "epoch": 0.01, + "learning_rate": 4.078780735644143e-05, + "loss": 3.6932, + "step": 562688 + }, + { + "epoch": 0.01, + "learning_rate": 4.0779437787734636e-05, + "loss": 3.6949, + "step": 563200 + }, + { + "epoch": 0.01, + "learning_rate": 4.0771051840224116e-05, + "loss": 3.6953, + "step": 563712 + }, + { + "epoch": 0.01, + "learning_rate": 4.0762665892713596e-05, + "loss": 3.7073, + "step": 564224 + }, + { + "epoch": 0.01, + "learning_rate": 4.0754279945203076e-05, + "loss": 3.6897, + "step": 564736 + }, + { + "epoch": 0.01, + "learning_rate": 4.0745910376496285e-05, + "loss": 3.6958, + "step": 565248 + }, + { + "epoch": 0.01, + "learning_rate": 4.0737524428985765e-05, + "loss": 3.708, + "step": 565760 + }, + { + "epoch": 0.01, + "learning_rate": 4.072913848147525e-05, + "loss": 3.6815, + "step": 566272 + }, + { + "epoch": 0.01, + "learning_rate": 4.072075253396473e-05, + "loss": 3.6848, + "step": 566784 + }, + { + "epoch": 0.01, + "learning_rate": 4.071238296525794e-05, + "loss": 3.7005, + "step": 567296 + }, + { + "epoch": 0.01, + "learning_rate": 4.070399701774742e-05, + "loss": 3.6993, + "step": 567808 + }, + { + "epoch": 0.01, + "learning_rate": 4.06956110702369e-05, + "loss": 3.6879, + "step": 568320 + }, + { + "epoch": 0.01, + "learning_rate": 4.068722512272638e-05, + "loss": 3.6931, + "step": 568832 + }, + { + "epoch": 0.01, + "learning_rate": 4.067885555401959e-05, + "loss": 3.6828, + "step": 569344 + }, + { + "epoch": 0.01, + "learning_rate": 4.06704859853128e-05, + "loss": 3.6965, + "step": 569856 + }, + { + "epoch": 0.01, + "learning_rate": 4.066210003780228e-05, + "loss": 3.6979, + "step": 570368 + }, + { + "epoch": 0.01, + "learning_rate": 4.065371409029176e-05, + "loss": 3.6952, + "step": 570880 + }, + { + "epoch": 0.01, + "learning_rate": 4.064532814278124e-05, + "loss": 3.6965, + "step": 571392 + }, + { + "epoch": 0.01, + "learning_rate": 4.063694219527072e-05, + "loss": 3.7029, + "step": 571904 + }, + { + "epoch": 0.01, + "learning_rate": 4.06285562477602e-05, + "loss": 3.7149, + "step": 572416 + }, + { + "epoch": 0.01, + "learning_rate": 4.0620170300249686e-05, + "loss": 3.6906, + "step": 572928 + }, + { + "epoch": 0.01, + "learning_rate": 4.0611817110346624e-05, + "loss": 3.695, + "step": 573440 + }, + { + "epoch": 0.01, + "learning_rate": 4.0603431162836104e-05, + "loss": 3.7036, + "step": 573952 + }, + { + "epoch": 0.01, + "learning_rate": 4.0595045215325584e-05, + "loss": 3.6831, + "step": 574464 + }, + { + "epoch": 0.01, + "learning_rate": 4.0586659267815064e-05, + "loss": 3.6972, + "step": 574976 + }, + { + "epoch": 0.01, + "learning_rate": 4.0578273320304544e-05, + "loss": 3.7006, + "step": 575488 + }, + { + "epoch": 0.01, + "learning_rate": 4.0569887372794024e-05, + "loss": 3.6994, + "step": 576000 + }, + { + "epoch": 0.01, + "learning_rate": 4.056151780408723e-05, + "loss": 3.7027, + "step": 576512 + }, + { + "epoch": 0.01, + "learning_rate": 4.055313185657671e-05, + "loss": 3.6849, + "step": 577024 + }, + { + "epoch": 0.01, + "learning_rate": 4.054474590906619e-05, + "loss": 3.6876, + "step": 577536 + }, + { + "epoch": 0.01, + "learning_rate": 4.053635996155567e-05, + "loss": 3.6896, + "step": 578048 + }, + { + "epoch": 0.01, + "learning_rate": 4.052797401404515e-05, + "loss": 3.6948, + "step": 578560 + }, + { + "epoch": 0.01, + "learning_rate": 4.051958806653464e-05, + "loss": 3.6965, + "step": 579072 + }, + { + "epoch": 0.01, + "learning_rate": 4.051120211902411e-05, + "loss": 3.6853, + "step": 579584 + }, + { + "epoch": 0.02, + "learning_rate": 4.050283255031733e-05, + "loss": 3.6919, + "step": 580096 + }, + { + "epoch": 0.02, + "learning_rate": 4.049444660280681e-05, + "loss": 3.6882, + "step": 580608 + }, + { + "epoch": 0.02, + "learning_rate": 4.048606065529629e-05, + "loss": 3.6836, + "step": 581120 + }, + { + "epoch": 0.02, + "learning_rate": 4.047767470778576e-05, + "loss": 3.6929, + "step": 581632 + }, + { + "epoch": 0.02, + "learning_rate": 4.046930513907898e-05, + "loss": 3.6948, + "step": 582144 + }, + { + "epoch": 0.02, + "learning_rate": 4.046091919156846e-05, + "loss": 3.7035, + "step": 582656 + }, + { + "epoch": 0.02, + "learning_rate": 4.045253324405793e-05, + "loss": 3.6935, + "step": 583168 + }, + { + "epoch": 0.02, + "learning_rate": 4.044414729654741e-05, + "loss": 3.6829, + "step": 583680 + }, + { + "epoch": 0.02, + "learning_rate": 4.0435777727840626e-05, + "loss": 3.6958, + "step": 584192 + }, + { + "epoch": 0.02, + "learning_rate": 4.0427391780330106e-05, + "loss": 3.6923, + "step": 584704 + }, + { + "epoch": 0.02, + "learning_rate": 4.041902221162332e-05, + "loss": 3.6927, + "step": 585216 + }, + { + "epoch": 0.02, + "learning_rate": 4.04106362641128e-05, + "loss": 3.6886, + "step": 585728 + }, + { + "epoch": 0.02, + "learning_rate": 4.040225031660228e-05, + "loss": 3.6891, + "step": 586240 + }, + { + "epoch": 0.02, + "learning_rate": 4.039386436909176e-05, + "loss": 3.6877, + "step": 586752 + }, + { + "epoch": 0.02, + "learning_rate": 4.0385478421581235e-05, + "loss": 3.6987, + "step": 587264 + }, + { + "epoch": 0.02, + "learning_rate": 4.0377092474070715e-05, + "loss": 3.6768, + "step": 587776 + }, + { + "epoch": 0.02, + "learning_rate": 4.0368706526560195e-05, + "loss": 3.6879, + "step": 588288 + }, + { + "epoch": 0.02, + "learning_rate": 4.0360320579049675e-05, + "loss": 3.6842, + "step": 588800 + }, + { + "epoch": 0.02, + "learning_rate": 4.0351951010342884e-05, + "loss": 3.6941, + "step": 589312 + }, + { + "epoch": 0.02, + "learning_rate": 4.0343565062832364e-05, + "loss": 3.6787, + "step": 589824 + }, + { + "epoch": 0.02, + "learning_rate": 4.0335179115321844e-05, + "loss": 3.6938, + "step": 590336 + }, + { + "epoch": 0.02, + "learning_rate": 4.032679316781133e-05, + "loss": 3.6824, + "step": 590848 + }, + { + "epoch": 0.02, + "learning_rate": 4.031842359910454e-05, + "loss": 3.6922, + "step": 591360 + }, + { + "epoch": 0.02, + "learning_rate": 4.031003765159402e-05, + "loss": 3.6938, + "step": 591872 + }, + { + "epoch": 0.02, + "learning_rate": 4.03016517040835e-05, + "loss": 3.6819, + "step": 592384 + }, + { + "epoch": 0.02, + "learning_rate": 4.029326575657298e-05, + "loss": 3.6887, + "step": 592896 + }, + { + "epoch": 0.02, + "learning_rate": 4.028489618786619e-05, + "loss": 3.7028, + "step": 593408 + }, + { + "epoch": 0.02, + "learning_rate": 4.027651024035567e-05, + "loss": 3.6919, + "step": 593920 + }, + { + "epoch": 0.02, + "learning_rate": 4.026812429284515e-05, + "loss": 3.6853, + "step": 594432 + }, + { + "epoch": 0.02, + "learning_rate": 4.025973834533463e-05, + "loss": 3.6937, + "step": 594944 + }, + { + "epoch": 0.02, + "learning_rate": 4.025136877662784e-05, + "loss": 3.6947, + "step": 595456 + }, + { + "epoch": 0.02, + "learning_rate": 4.024298282911732e-05, + "loss": 3.6808, + "step": 595968 + }, + { + "epoch": 0.02, + "learning_rate": 4.02345968816068e-05, + "loss": 3.69, + "step": 596480 + }, + { + "epoch": 0.02, + "learning_rate": 4.0226210934096284e-05, + "loss": 3.6767, + "step": 596992 + }, + { + "epoch": 0.02, + "learning_rate": 4.0217841365389493e-05, + "loss": 3.6877, + "step": 597504 + }, + { + "epoch": 0.02, + "learning_rate": 4.0209455417878973e-05, + "loss": 3.6848, + "step": 598016 + }, + { + "epoch": 0.02, + "learning_rate": 4.020106947036845e-05, + "loss": 3.6866, + "step": 598528 + }, + { + "epoch": 0.02, + "learning_rate": 4.019268352285793e-05, + "loss": 3.6852, + "step": 599040 + }, + { + "epoch": 0.02, + "learning_rate": 4.018431395415114e-05, + "loss": 3.6794, + "step": 599552 + }, + { + "epoch": 0.02, + "learning_rate": 4.017592800664062e-05, + "loss": 3.6793, + "step": 600064 + }, + { + "epoch": 0.02, + "learning_rate": 4.01675420591301e-05, + "loss": 3.6969, + "step": 600576 + }, + { + "epoch": 0.02, + "learning_rate": 4.015915611161958e-05, + "loss": 3.692, + "step": 601088 + }, + { + "epoch": 0.02, + "learning_rate": 4.015078654291279e-05, + "loss": 3.6957, + "step": 601600 + }, + { + "epoch": 0.02, + "learning_rate": 4.014240059540227e-05, + "loss": 3.6746, + "step": 602112 + }, + { + "epoch": 0.02, + "learning_rate": 4.013401464789175e-05, + "loss": 3.7004, + "step": 602624 + }, + { + "epoch": 0.02, + "learning_rate": 4.012562870038124e-05, + "loss": 3.6855, + "step": 603136 + }, + { + "epoch": 0.02, + "learning_rate": 4.011725913167445e-05, + "loss": 3.689, + "step": 603648 + }, + { + "epoch": 0.02, + "learning_rate": 4.010887318416393e-05, + "loss": 3.6917, + "step": 604160 + }, + { + "epoch": 0.02, + "learning_rate": 4.010048723665341e-05, + "loss": 3.6874, + "step": 604672 + }, + { + "epoch": 0.02, + "learning_rate": 4.0092117667946616e-05, + "loss": 3.6805, + "step": 605184 + }, + { + "epoch": 0.02, + "learning_rate": 4.0083731720436096e-05, + "loss": 3.7039, + "step": 605696 + }, + { + "epoch": 0.02, + "learning_rate": 4.0075345772925576e-05, + "loss": 3.6748, + "step": 606208 + }, + { + "epoch": 0.02, + "learning_rate": 4.0066959825415056e-05, + "loss": 3.6927, + "step": 606720 + }, + { + "epoch": 0.02, + "learning_rate": 4.0058590256708265e-05, + "loss": 3.6821, + "step": 607232 + }, + { + "epoch": 0.02, + "learning_rate": 4.0050204309197745e-05, + "loss": 3.683, + "step": 607744 + }, + { + "epoch": 0.02, + "learning_rate": 4.0041818361687225e-05, + "loss": 3.6822, + "step": 608256 + }, + { + "epoch": 0.02, + "learning_rate": 4.0033432414176705e-05, + "loss": 3.6844, + "step": 608768 + }, + { + "epoch": 0.02, + "learning_rate": 4.002506284546992e-05, + "loss": 3.696, + "step": 609280 + }, + { + "epoch": 0.02, + "learning_rate": 4.00166768979594e-05, + "loss": 3.6766, + "step": 609792 + }, + { + "epoch": 0.02, + "learning_rate": 4.000829095044888e-05, + "loss": 3.6882, + "step": 610304 + }, + { + "epoch": 0.03, + "eval_loss": 3.8529491424560547, + "eval_runtime": 303.7522, + "eval_samples_per_second": 1256.258, + "eval_steps_per_second": 39.259, + "step": 610560 + }, + { + "epoch": 1.0, + "learning_rate": 3.999990500293836e-05, + "loss": 3.6808, + "step": 610816 + }, + { + "epoch": 1.0, + "learning_rate": 3.999153543423157e-05, + "loss": 3.6796, + "step": 611328 + }, + { + "epoch": 1.0, + "learning_rate": 3.998314948672105e-05, + "loss": 3.6847, + "step": 611840 + }, + { + "epoch": 1.0, + "learning_rate": 3.997476353921053e-05, + "loss": 3.6812, + "step": 612352 + }, + { + "epoch": 1.0, + "learning_rate": 3.996637759170001e-05, + "loss": 3.6931, + "step": 612864 + }, + { + "epoch": 1.0, + "learning_rate": 3.995800802299322e-05, + "loss": 3.6769, + "step": 613376 + }, + { + "epoch": 1.0, + "learning_rate": 3.99496220754827e-05, + "loss": 3.6841, + "step": 613888 + }, + { + "epoch": 1.0, + "learning_rate": 3.994123612797218e-05, + "loss": 3.6785, + "step": 614400 + }, + { + "epoch": 1.0, + "learning_rate": 3.993285018046166e-05, + "loss": 3.6815, + "step": 614912 + }, + { + "epoch": 1.0, + "learning_rate": 3.9924480611754874e-05, + "loss": 3.6887, + "step": 615424 + }, + { + "epoch": 1.0, + "learning_rate": 3.9916094664244354e-05, + "loss": 3.6813, + "step": 615936 + }, + { + "epoch": 1.0, + "learning_rate": 3.9907708716733834e-05, + "loss": 3.6784, + "step": 616448 + }, + { + "epoch": 1.0, + "learning_rate": 3.9899339148027044e-05, + "loss": 3.6821, + "step": 616960 + }, + { + "epoch": 1.0, + "learning_rate": 3.9890953200516523e-05, + "loss": 3.6694, + "step": 617472 + }, + { + "epoch": 1.0, + "learning_rate": 3.9882567253006003e-05, + "loss": 3.6794, + "step": 617984 + }, + { + "epoch": 1.0, + "learning_rate": 3.987418130549548e-05, + "loss": 3.6732, + "step": 618496 + }, + { + "epoch": 1.0, + "learning_rate": 3.986581173678869e-05, + "loss": 3.6786, + "step": 619008 + }, + { + "epoch": 1.0, + "learning_rate": 3.985742578927817e-05, + "loss": 3.6742, + "step": 619520 + }, + { + "epoch": 1.0, + "learning_rate": 3.984903984176765e-05, + "loss": 3.6716, + "step": 620032 + }, + { + "epoch": 1.0, + "learning_rate": 3.984065389425713e-05, + "loss": 3.6881, + "step": 620544 + }, + { + "epoch": 1.0, + "learning_rate": 3.983226794674661e-05, + "loss": 3.6903, + "step": 621056 + }, + { + "epoch": 1.0, + "learning_rate": 3.98238819992361e-05, + "loss": 3.6894, + "step": 621568 + }, + { + "epoch": 1.0, + "learning_rate": 3.981549605172558e-05, + "loss": 3.6769, + "step": 622080 + }, + { + "epoch": 1.0, + "learning_rate": 3.980711010421506e-05, + "loss": 3.6771, + "step": 622592 + }, + { + "epoch": 1.0, + "learning_rate": 3.979874053550827e-05, + "loss": 3.674, + "step": 623104 + }, + { + "epoch": 1.0, + "learning_rate": 3.979035458799775e-05, + "loss": 3.6846, + "step": 623616 + }, + { + "epoch": 1.0, + "learning_rate": 3.978196864048723e-05, + "loss": 3.6782, + "step": 624128 + }, + { + "epoch": 1.0, + "learning_rate": 3.977358269297671e-05, + "loss": 3.6752, + "step": 624640 + }, + { + "epoch": 1.0, + "learning_rate": 3.976521312426992e-05, + "loss": 3.6689, + "step": 625152 + }, + { + "epoch": 1.0, + "learning_rate": 3.97568271767594e-05, + "loss": 3.6741, + "step": 625664 + }, + { + "epoch": 1.01, + "learning_rate": 3.974844122924888e-05, + "loss": 3.6691, + "step": 626176 + }, + { + "epoch": 1.01, + "learning_rate": 3.974005528173836e-05, + "loss": 3.6883, + "step": 626688 + }, + { + "epoch": 1.01, + "learning_rate": 3.9731685713031566e-05, + "loss": 3.6847, + "step": 627200 + }, + { + "epoch": 1.01, + "learning_rate": 3.9723299765521046e-05, + "loss": 3.6742, + "step": 627712 + }, + { + "epoch": 1.01, + "learning_rate": 3.971491381801053e-05, + "loss": 3.6687, + "step": 628224 + }, + { + "epoch": 1.01, + "learning_rate": 3.970652787050001e-05, + "loss": 3.6818, + "step": 628736 + }, + { + "epoch": 1.01, + "learning_rate": 3.969815830179322e-05, + "loss": 3.6787, + "step": 629248 + }, + { + "epoch": 1.01, + "learning_rate": 3.96897723542827e-05, + "loss": 3.6738, + "step": 629760 + }, + { + "epoch": 1.01, + "learning_rate": 3.968138640677218e-05, + "loss": 3.6719, + "step": 630272 + }, + { + "epoch": 1.01, + "learning_rate": 3.967301683806539e-05, + "loss": 3.6642, + "step": 630784 + }, + { + "epoch": 1.01, + "learning_rate": 3.966463089055487e-05, + "loss": 3.6667, + "step": 631296 + }, + { + "epoch": 1.01, + "learning_rate": 3.965626132184808e-05, + "loss": 3.682, + "step": 631808 + }, + { + "epoch": 1.01, + "learning_rate": 3.964787537433756e-05, + "loss": 3.6796, + "step": 632320 + }, + { + "epoch": 1.01, + "learning_rate": 3.963948942682704e-05, + "loss": 3.6713, + "step": 632832 + }, + { + "epoch": 1.01, + "learning_rate": 3.963110347931652e-05, + "loss": 3.6765, + "step": 633344 + }, + { + "epoch": 1.01, + "learning_rate": 3.9622717531806e-05, + "loss": 3.6693, + "step": 633856 + }, + { + "epoch": 1.01, + "learning_rate": 3.9614331584295486e-05, + "loss": 3.6791, + "step": 634368 + }, + { + "epoch": 1.01, + "learning_rate": 3.960594563678496e-05, + "loss": 3.6701, + "step": 634880 + }, + { + "epoch": 1.01, + "learning_rate": 3.9597576068078175e-05, + "loss": 3.6471, + "step": 635392 + }, + { + "epoch": 1.01, + "learning_rate": 3.9589190120567655e-05, + "loss": 3.6833, + "step": 635904 + }, + { + "epoch": 1.01, + "learning_rate": 3.9580804173057135e-05, + "loss": 3.6706, + "step": 636416 + }, + { + "epoch": 1.01, + "learning_rate": 3.957241822554661e-05, + "loss": 3.6758, + "step": 636928 + }, + { + "epoch": 1.01, + "learning_rate": 3.9564048656839824e-05, + "loss": 3.6673, + "step": 637440 + }, + { + "epoch": 1.01, + "learning_rate": 3.9555662709329304e-05, + "loss": 3.6655, + "step": 637952 + }, + { + "epoch": 1.01, + "learning_rate": 3.9547276761818784e-05, + "loss": 3.6584, + "step": 638464 + }, + { + "epoch": 1.01, + "learning_rate": 3.953889081430826e-05, + "loss": 3.6644, + "step": 638976 + }, + { + "epoch": 1.01, + "learning_rate": 3.953052124560147e-05, + "loss": 3.6677, + "step": 639488 + }, + { + "epoch": 1.01, + "learning_rate": 3.952215167689469e-05, + "loss": 3.6643, + "step": 640000 + }, + { + "epoch": 1.01, + "learning_rate": 3.951376572938417e-05, + "loss": 3.6797, + "step": 640512 + }, + { + "epoch": 1.01, + "learning_rate": 3.950537978187365e-05, + "loss": 3.6627, + "step": 641024 + }, + { + "epoch": 1.01, + "learning_rate": 3.949699383436313e-05, + "loss": 3.6684, + "step": 641536 + }, + { + "epoch": 1.01, + "learning_rate": 3.948860788685261e-05, + "loss": 3.6816, + "step": 642048 + }, + { + "epoch": 1.01, + "learning_rate": 3.948022193934208e-05, + "loss": 3.6537, + "step": 642560 + }, + { + "epoch": 1.01, + "learning_rate": 3.947183599183156e-05, + "loss": 3.6566, + "step": 643072 + }, + { + "epoch": 1.01, + "learning_rate": 3.946345004432104e-05, + "loss": 3.6703, + "step": 643584 + }, + { + "epoch": 1.01, + "learning_rate": 3.945509685441799e-05, + "loss": 3.6713, + "step": 644096 + }, + { + "epoch": 1.01, + "learning_rate": 3.944671090690747e-05, + "loss": 3.6597, + "step": 644608 + }, + { + "epoch": 1.01, + "learning_rate": 3.943832495939695e-05, + "loss": 3.6632, + "step": 645120 + }, + { + "epoch": 1.01, + "learning_rate": 3.942993901188643e-05, + "loss": 3.6601, + "step": 645632 + }, + { + "epoch": 1.01, + "learning_rate": 3.942155306437591e-05, + "loss": 3.6649, + "step": 646144 + }, + { + "epoch": 1.01, + "learning_rate": 3.941316711686539e-05, + "loss": 3.67, + "step": 646656 + }, + { + "epoch": 1.01, + "learning_rate": 3.940478116935487e-05, + "loss": 3.6659, + "step": 647168 + }, + { + "epoch": 1.01, + "learning_rate": 3.939639522184435e-05, + "loss": 3.6711, + "step": 647680 + }, + { + "epoch": 1.01, + "learning_rate": 3.9388025653137556e-05, + "loss": 3.675, + "step": 648192 + }, + { + "epoch": 1.01, + "learning_rate": 3.9379639705627036e-05, + "loss": 3.6881, + "step": 648704 + }, + { + "epoch": 1.01, + "learning_rate": 3.9371253758116516e-05, + "loss": 3.6598, + "step": 649216 + }, + { + "epoch": 1.01, + "learning_rate": 3.936288418940973e-05, + "loss": 3.6724, + "step": 649728 + }, + { + "epoch": 1.01, + "learning_rate": 3.9354498241899205e-05, + "loss": 3.6727, + "step": 650240 + }, + { + "epoch": 1.01, + "learning_rate": 3.9346112294388685e-05, + "loss": 3.6568, + "step": 650752 + }, + { + "epoch": 1.01, + "learning_rate": 3.9337726346878165e-05, + "loss": 3.6646, + "step": 651264 + }, + { + "epoch": 1.01, + "learning_rate": 3.9329340399367645e-05, + "loss": 3.6771, + "step": 651776 + }, + { + "epoch": 1.01, + "learning_rate": 3.932095445185713e-05, + "loss": 3.6709, + "step": 652288 + }, + { + "epoch": 1.01, + "learning_rate": 3.931258488315034e-05, + "loss": 3.6751, + "step": 652800 + }, + { + "epoch": 1.01, + "learning_rate": 3.930419893563982e-05, + "loss": 3.6551, + "step": 653312 + }, + { + "epoch": 1.01, + "learning_rate": 3.92958129881293e-05, + "loss": 3.658, + "step": 653824 + }, + { + "epoch": 1.01, + "learning_rate": 3.928742704061878e-05, + "loss": 3.6625, + "step": 654336 + }, + { + "epoch": 1.01, + "learning_rate": 3.927904109310826e-05, + "loss": 3.6685, + "step": 654848 + }, + { + "epoch": 1.01, + "learning_rate": 3.927065514559774e-05, + "loss": 3.6676, + "step": 655360 + }, + { + "epoch": 1.01, + "learning_rate": 3.926226919808722e-05, + "loss": 3.6608, + "step": 655872 + }, + { + "epoch": 1.02, + "learning_rate": 3.92538832505767e-05, + "loss": 3.6588, + "step": 656384 + }, + { + "epoch": 1.02, + "learning_rate": 3.924551368186991e-05, + "loss": 3.6666, + "step": 656896 + }, + { + "epoch": 1.02, + "learning_rate": 3.923712773435939e-05, + "loss": 3.6554, + "step": 657408 + }, + { + "epoch": 1.02, + "learning_rate": 3.922874178684887e-05, + "loss": 3.6657, + "step": 657920 + }, + { + "epoch": 1.02, + "learning_rate": 3.9220372218142085e-05, + "loss": 3.6715, + "step": 658432 + }, + { + "epoch": 1.02, + "learning_rate": 3.9211986270631565e-05, + "loss": 3.6753, + "step": 658944 + }, + { + "epoch": 1.02, + "learning_rate": 3.9203600323121045e-05, + "loss": 3.6643, + "step": 659456 + }, + { + "epoch": 1.02, + "learning_rate": 3.9195214375610525e-05, + "loss": 3.6582, + "step": 659968 + }, + { + "epoch": 1.02, + "learning_rate": 3.9186828428100005e-05, + "loss": 3.6681, + "step": 660480 + }, + { + "epoch": 1.02, + "learning_rate": 3.9178442480589485e-05, + "loss": 3.6616, + "step": 660992 + }, + { + "epoch": 1.02, + "learning_rate": 3.917008929068642e-05, + "loss": 3.6688, + "step": 661504 + }, + { + "epoch": 1.02, + "learning_rate": 3.91617033431759e-05, + "loss": 3.662, + "step": 662016 + }, + { + "epoch": 1.02, + "learning_rate": 3.915331739566538e-05, + "loss": 3.6564, + "step": 662528 + }, + { + "epoch": 1.02, + "learning_rate": 3.914493144815486e-05, + "loss": 3.668, + "step": 663040 + }, + { + "epoch": 1.02, + "learning_rate": 3.913654550064434e-05, + "loss": 3.6677, + "step": 663552 + }, + { + "epoch": 1.02, + "learning_rate": 3.912815955313382e-05, + "loss": 3.6495, + "step": 664064 + }, + { + "epoch": 1.02, + "learning_rate": 3.91197736056233e-05, + "loss": 3.66, + "step": 664576 + }, + { + "epoch": 1.02, + "learning_rate": 3.911138765811278e-05, + "loss": 3.654, + "step": 665088 + }, + { + "epoch": 1.02, + "learning_rate": 3.910300171060226e-05, + "loss": 3.6728, + "step": 665600 + }, + { + "epoch": 1.02, + "learning_rate": 3.909461576309174e-05, + "loss": 3.6497, + "step": 666112 + }, + { + "epoch": 1.02, + "learning_rate": 3.908622981558122e-05, + "loss": 3.6677, + "step": 666624 + }, + { + "epoch": 1.02, + "learning_rate": 3.90778438680707e-05, + "loss": 3.6553, + "step": 667136 + }, + { + "epoch": 1.02, + "learning_rate": 3.906947429936392e-05, + "loss": 3.6638, + "step": 667648 + }, + { + "epoch": 1.02, + "learning_rate": 3.906108835185339e-05, + "loss": 3.6651, + "step": 668160 + }, + { + "epoch": 1.02, + "learning_rate": 3.905270240434287e-05, + "loss": 3.6572, + "step": 668672 + }, + { + "epoch": 1.02, + "learning_rate": 3.904431645683235e-05, + "loss": 3.661, + "step": 669184 + }, + { + "epoch": 1.02, + "learning_rate": 3.9035963266929297e-05, + "loss": 3.6772, + "step": 669696 + }, + { + "epoch": 1.02, + "learning_rate": 3.9027577319418777e-05, + "loss": 3.6628, + "step": 670208 + }, + { + "epoch": 1.02, + "learning_rate": 3.9019191371908257e-05, + "loss": 3.6638, + "step": 670720 + }, + { + "epoch": 1.02, + "learning_rate": 3.9010805424397736e-05, + "loss": 3.6641, + "step": 671232 + }, + { + "epoch": 1.02, + "learning_rate": 3.9002419476887216e-05, + "loss": 3.6649, + "step": 671744 + }, + { + "epoch": 1.02, + "learning_rate": 3.8994033529376696e-05, + "loss": 3.659, + "step": 672256 + }, + { + "epoch": 1.02, + "learning_rate": 3.8985647581866176e-05, + "loss": 3.6619, + "step": 672768 + }, + { + "epoch": 1.02, + "learning_rate": 3.8977261634355656e-05, + "loss": 3.6491, + "step": 673280 + }, + { + "epoch": 1.02, + "learning_rate": 3.8968892065648865e-05, + "loss": 3.6628, + "step": 673792 + }, + { + "epoch": 1.02, + "learning_rate": 3.8960506118138345e-05, + "loss": 3.6556, + "step": 674304 + }, + { + "epoch": 1.02, + "learning_rate": 3.8952120170627825e-05, + "loss": 3.6588, + "step": 674816 + }, + { + "epoch": 1.02, + "learning_rate": 3.8943734223117305e-05, + "loss": 3.6579, + "step": 675328 + }, + { + "epoch": 1.02, + "learning_rate": 3.8935364654410514e-05, + "loss": 3.6561, + "step": 675840 + }, + { + "epoch": 1.02, + "learning_rate": 3.8926978706899994e-05, + "loss": 3.6523, + "step": 676352 + }, + { + "epoch": 1.02, + "learning_rate": 3.8918592759389474e-05, + "loss": 3.6721, + "step": 676864 + }, + { + "epoch": 1.02, + "learning_rate": 3.8910206811878954e-05, + "loss": 3.6595, + "step": 677376 + }, + { + "epoch": 1.02, + "learning_rate": 3.890183724317217e-05, + "loss": 3.6724, + "step": 677888 + }, + { + "epoch": 1.02, + "learning_rate": 3.889345129566165e-05, + "loss": 3.6454, + "step": 678400 + }, + { + "epoch": 1.02, + "learning_rate": 3.888506534815113e-05, + "loss": 3.6748, + "step": 678912 + }, + { + "epoch": 1.02, + "learning_rate": 3.887667940064061e-05, + "loss": 3.6576, + "step": 679424 + }, + { + "epoch": 1.02, + "learning_rate": 3.886830983193382e-05, + "loss": 3.6661, + "step": 679936 + }, + { + "epoch": 1.02, + "learning_rate": 3.88599238844233e-05, + "loss": 3.665, + "step": 680448 + }, + { + "epoch": 1.02, + "learning_rate": 3.885153793691278e-05, + "loss": 3.6596, + "step": 680960 + }, + { + "epoch": 1.02, + "learning_rate": 3.884315198940226e-05, + "loss": 3.6605, + "step": 681472 + }, + { + "epoch": 1.02, + "learning_rate": 3.883478242069547e-05, + "loss": 3.6778, + "step": 681984 + }, + { + "epoch": 1.02, + "learning_rate": 3.882639647318495e-05, + "loss": 3.6436, + "step": 682496 + }, + { + "epoch": 1.02, + "learning_rate": 3.881801052567443e-05, + "loss": 3.6683, + "step": 683008 + }, + { + "epoch": 1.02, + "learning_rate": 3.8809640956967644e-05, + "loss": 3.6579, + "step": 683520 + }, + { + "epoch": 1.02, + "learning_rate": 3.8801255009457124e-05, + "loss": 3.65, + "step": 684032 + }, + { + "epoch": 1.02, + "learning_rate": 3.8792869061946604e-05, + "loss": 3.655, + "step": 684544 + }, + { + "epoch": 1.02, + "learning_rate": 3.8784483114436084e-05, + "loss": 3.6567, + "step": 685056 + }, + { + "epoch": 1.02, + "learning_rate": 3.8776097166925564e-05, + "loss": 3.6716, + "step": 685568 + }, + { + "epoch": 1.02, + "learning_rate": 3.8767711219415044e-05, + "loss": 3.6532, + "step": 686080 + }, + { + "epoch": 1.02, + "learning_rate": 3.875934165070825e-05, + "loss": 3.6599, + "step": 686592 + }, + { + "epoch": 1.03, + "eval_loss": 3.8484747409820557, + "eval_runtime": 303.6968, + "eval_samples_per_second": 1256.487, + "eval_steps_per_second": 39.266, + "step": 686880 + }, + { + "epoch": 0.0, + "learning_rate": 3.875095570319773e-05, + "loss": 3.6573, + "step": 687104 + }, + { + "epoch": 0.0, + "learning_rate": 3.874256975568721e-05, + "loss": 3.6531, + "step": 687616 + }, + { + "epoch": 0.0, + "learning_rate": 3.873418380817669e-05, + "loss": 3.657, + "step": 688128 + }, + { + "epoch": 0.0, + "learning_rate": 3.872579786066617e-05, + "loss": 3.6578, + "step": 688640 + }, + { + "epoch": 0.0, + "learning_rate": 3.871741191315565e-05, + "loss": 3.6695, + "step": 689152 + }, + { + "epoch": 0.0, + "learning_rate": 3.870902596564513e-05, + "loss": 3.6481, + "step": 689664 + }, + { + "epoch": 0.0, + "learning_rate": 3.870065639693835e-05, + "loss": 3.6584, + "step": 690176 + }, + { + "epoch": 0.0, + "learning_rate": 3.869227044942783e-05, + "loss": 3.6519, + "step": 690688 + }, + { + "epoch": 0.0, + "learning_rate": 3.868388450191731e-05, + "loss": 3.6569, + "step": 691200 + }, + { + "epoch": 0.0, + "learning_rate": 3.867549855440679e-05, + "loss": 3.6656, + "step": 691712 + }, + { + "epoch": 0.0, + "learning_rate": 3.866711260689627e-05, + "loss": 3.6544, + "step": 692224 + }, + { + "epoch": 0.0, + "learning_rate": 3.865872665938575e-05, + "loss": 3.6547, + "step": 692736 + }, + { + "epoch": 0.0, + "learning_rate": 3.865035709067896e-05, + "loss": 3.661, + "step": 693248 + }, + { + "epoch": 0.0, + "learning_rate": 3.864197114316844e-05, + "loss": 3.6417, + "step": 693760 + }, + { + "epoch": 0.0, + "learning_rate": 3.863358519565792e-05, + "loss": 3.6529, + "step": 694272 + }, + { + "epoch": 0.0, + "learning_rate": 3.86251992481474e-05, + "loss": 3.6443, + "step": 694784 + }, + { + "epoch": 0.0, + "learning_rate": 3.861681330063687e-05, + "loss": 3.6535, + "step": 695296 + }, + { + "epoch": 0.0, + "learning_rate": 3.860842735312635e-05, + "loss": 3.649, + "step": 695808 + }, + { + "epoch": 0.0, + "learning_rate": 3.860004140561583e-05, + "loss": 3.6461, + "step": 696320 + }, + { + "epoch": 0.0, + "learning_rate": 3.8591671836909046e-05, + "loss": 3.6602, + "step": 696832 + }, + { + "epoch": 0.0, + "learning_rate": 3.8583285889398526e-05, + "loss": 3.6617, + "step": 697344 + }, + { + "epoch": 0.0, + "learning_rate": 3.857491632069174e-05, + "loss": 3.663, + "step": 697856 + }, + { + "epoch": 0.0, + "learning_rate": 3.856653037318122e-05, + "loss": 3.6553, + "step": 698368 + }, + { + "epoch": 0.0, + "learning_rate": 3.8558144425670695e-05, + "loss": 3.6513, + "step": 698880 + }, + { + "epoch": 0.0, + "learning_rate": 3.8549758478160175e-05, + "loss": 3.649, + "step": 699392 + }, + { + "epoch": 0.0, + "learning_rate": 3.8541372530649655e-05, + "loss": 3.6601, + "step": 699904 + }, + { + "epoch": 0.0, + "learning_rate": 3.8532986583139135e-05, + "loss": 3.6547, + "step": 700416 + }, + { + "epoch": 0.0, + "learning_rate": 3.8524600635628615e-05, + "loss": 3.6506, + "step": 700928 + }, + { + "epoch": 0.0, + "learning_rate": 3.8516214688118095e-05, + "loss": 3.6443, + "step": 701440 + }, + { + "epoch": 0.0, + "learning_rate": 3.8507845119411304e-05, + "loss": 3.6476, + "step": 701952 + }, + { + "epoch": 0.01, + "learning_rate": 3.849947555070452e-05, + "loss": 3.6438, + "step": 702464 + }, + { + "epoch": 0.01, + "learning_rate": 3.8491089603194e-05, + "loss": 3.6617, + "step": 702976 + }, + { + "epoch": 0.01, + "learning_rate": 3.848270365568348e-05, + "loss": 3.6611, + "step": 703488 + }, + { + "epoch": 0.01, + "learning_rate": 3.847431770817296e-05, + "loss": 3.6486, + "step": 704000 + }, + { + "epoch": 0.01, + "learning_rate": 3.846593176066244e-05, + "loss": 3.6456, + "step": 704512 + }, + { + "epoch": 0.01, + "learning_rate": 3.845754581315192e-05, + "loss": 3.6515, + "step": 705024 + }, + { + "epoch": 0.01, + "learning_rate": 3.84491598656414e-05, + "loss": 3.6557, + "step": 705536 + }, + { + "epoch": 0.01, + "learning_rate": 3.844077391813088e-05, + "loss": 3.6507, + "step": 706048 + }, + { + "epoch": 0.01, + "learning_rate": 3.843240434942409e-05, + "loss": 3.6459, + "step": 706560 + }, + { + "epoch": 0.01, + "learning_rate": 3.842401840191357e-05, + "loss": 3.6393, + "step": 707072 + }, + { + "epoch": 0.01, + "learning_rate": 3.841563245440305e-05, + "loss": 3.6461, + "step": 707584 + }, + { + "epoch": 0.01, + "learning_rate": 3.840726288569626e-05, + "loss": 3.6511, + "step": 708096 + }, + { + "epoch": 0.01, + "learning_rate": 3.839887693818574e-05, + "loss": 3.6545, + "step": 708608 + }, + { + "epoch": 0.01, + "learning_rate": 3.8390490990675224e-05, + "loss": 3.6488, + "step": 709120 + }, + { + "epoch": 0.01, + "learning_rate": 3.8382105043164704e-05, + "loss": 3.6482, + "step": 709632 + }, + { + "epoch": 0.01, + "learning_rate": 3.8373719095654184e-05, + "loss": 3.65, + "step": 710144 + }, + { + "epoch": 0.01, + "learning_rate": 3.8365333148143664e-05, + "loss": 3.6508, + "step": 710656 + }, + { + "epoch": 0.01, + "learning_rate": 3.8356947200633144e-05, + "loss": 3.6472, + "step": 711168 + }, + { + "epoch": 0.01, + "learning_rate": 3.834857763192635e-05, + "loss": 3.6213, + "step": 711680 + }, + { + "epoch": 0.01, + "learning_rate": 3.834019168441583e-05, + "loss": 3.6577, + "step": 712192 + }, + { + "epoch": 0.01, + "learning_rate": 3.833180573690531e-05, + "loss": 3.6455, + "step": 712704 + }, + { + "epoch": 0.01, + "learning_rate": 3.832341978939479e-05, + "loss": 3.6522, + "step": 713216 + }, + { + "epoch": 0.01, + "learning_rate": 3.831503384188427e-05, + "loss": 3.6434, + "step": 713728 + }, + { + "epoch": 0.01, + "learning_rate": 3.830664789437375e-05, + "loss": 3.641, + "step": 714240 + }, + { + "epoch": 0.01, + "learning_rate": 3.829826194686323e-05, + "loss": 3.6346, + "step": 714752 + }, + { + "epoch": 0.01, + "learning_rate": 3.828989237815644e-05, + "loss": 3.6369, + "step": 715264 + }, + { + "epoch": 0.01, + "learning_rate": 3.828150643064592e-05, + "loss": 3.6478, + "step": 715776 + }, + { + "epoch": 0.01, + "learning_rate": 3.827313686193914e-05, + "loss": 3.6336, + "step": 716288 + }, + { + "epoch": 0.01, + "learning_rate": 3.826475091442862e-05, + "loss": 3.6549, + "step": 716800 + }, + { + "epoch": 0.01, + "learning_rate": 3.82563649669181e-05, + "loss": 3.6408, + "step": 717312 + }, + { + "epoch": 0.01, + "learning_rate": 3.824797901940758e-05, + "loss": 3.639, + "step": 717824 + }, + { + "epoch": 0.01, + "learning_rate": 3.823959307189706e-05, + "loss": 3.6588, + "step": 718336 + }, + { + "epoch": 0.01, + "learning_rate": 3.823122350319027e-05, + "loss": 3.6315, + "step": 718848 + }, + { + "epoch": 0.01, + "learning_rate": 3.822283755567975e-05, + "loss": 3.6306, + "step": 719360 + }, + { + "epoch": 0.01, + "learning_rate": 3.8214451608169227e-05, + "loss": 3.6474, + "step": 719872 + }, + { + "epoch": 0.01, + "learning_rate": 3.8206065660658707e-05, + "loss": 3.6461, + "step": 720384 + }, + { + "epoch": 0.01, + "learning_rate": 3.819767971314818e-05, + "loss": 3.6357, + "step": 720896 + }, + { + "epoch": 0.01, + "learning_rate": 3.818929376563766e-05, + "loss": 3.6392, + "step": 721408 + }, + { + "epoch": 0.01, + "learning_rate": 3.818090781812714e-05, + "loss": 3.6282, + "step": 721920 + }, + { + "epoch": 0.01, + "learning_rate": 3.8172538249420356e-05, + "loss": 3.6433, + "step": 722432 + }, + { + "epoch": 0.01, + "learning_rate": 3.8164152301909835e-05, + "loss": 3.6418, + "step": 722944 + }, + { + "epoch": 0.01, + "learning_rate": 3.8155766354399315e-05, + "loss": 3.6448, + "step": 723456 + }, + { + "epoch": 0.01, + "learning_rate": 3.8147380406888795e-05, + "loss": 3.6431, + "step": 723968 + }, + { + "epoch": 0.01, + "learning_rate": 3.8138994459378275e-05, + "loss": 3.6481, + "step": 724480 + }, + { + "epoch": 0.01, + "learning_rate": 3.8130608511867755e-05, + "loss": 3.663, + "step": 724992 + }, + { + "epoch": 0.01, + "learning_rate": 3.8122222564357235e-05, + "loss": 3.6378, + "step": 725504 + }, + { + "epoch": 0.01, + "learning_rate": 3.8113852995650444e-05, + "loss": 3.6528, + "step": 726016 + }, + { + "epoch": 0.01, + "learning_rate": 3.8105467048139924e-05, + "loss": 3.644, + "step": 726528 + }, + { + "epoch": 0.01, + "learning_rate": 3.8097081100629404e-05, + "loss": 3.6356, + "step": 727040 + }, + { + "epoch": 0.01, + "learning_rate": 3.8088695153118884e-05, + "loss": 3.6376, + "step": 727552 + }, + { + "epoch": 0.01, + "learning_rate": 3.8080309205608364e-05, + "loss": 3.6546, + "step": 728064 + }, + { + "epoch": 0.01, + "learning_rate": 3.8071923258097844e-05, + "loss": 3.6453, + "step": 728576 + }, + { + "epoch": 0.01, + "learning_rate": 3.806355368939106e-05, + "loss": 3.6514, + "step": 729088 + }, + { + "epoch": 0.01, + "learning_rate": 3.805516774188054e-05, + "loss": 3.6295, + "step": 729600 + }, + { + "epoch": 0.01, + "learning_rate": 3.804678179437002e-05, + "loss": 3.635, + "step": 730112 + }, + { + "epoch": 0.01, + "learning_rate": 3.80383958468595e-05, + "loss": 3.6393, + "step": 730624 + }, + { + "epoch": 0.01, + "learning_rate": 3.803000989934898e-05, + "loss": 3.6419, + "step": 731136 + }, + { + "epoch": 0.01, + "learning_rate": 3.802164033064219e-05, + "loss": 3.6438, + "step": 731648 + }, + { + "epoch": 0.01, + "learning_rate": 3.801325438313167e-05, + "loss": 3.6402, + "step": 732160 + }, + { + "epoch": 0.02, + "learning_rate": 3.800486843562115e-05, + "loss": 3.6309, + "step": 732672 + }, + { + "epoch": 0.02, + "learning_rate": 3.799648248811063e-05, + "loss": 3.6455, + "step": 733184 + }, + { + "epoch": 0.02, + "learning_rate": 3.798809654060011e-05, + "loss": 3.6324, + "step": 733696 + }, + { + "epoch": 0.02, + "learning_rate": 3.797971059308959e-05, + "loss": 3.6399, + "step": 734208 + }, + { + "epoch": 0.02, + "learning_rate": 3.79713410243828e-05, + "loss": 3.6472, + "step": 734720 + }, + { + "epoch": 0.02, + "learning_rate": 3.796295507687228e-05, + "loss": 3.6497, + "step": 735232 + }, + { + "epoch": 0.02, + "learning_rate": 3.7954569129361764e-05, + "loss": 3.6398, + "step": 735744 + }, + { + "epoch": 0.02, + "learning_rate": 3.7946183181851244e-05, + "loss": 3.635, + "step": 736256 + }, + { + "epoch": 0.02, + "learning_rate": 3.793779723434072e-05, + "loss": 3.6395, + "step": 736768 + }, + { + "epoch": 0.02, + "learning_rate": 3.79294112868302e-05, + "loss": 3.6373, + "step": 737280 + }, + { + "epoch": 0.02, + "learning_rate": 3.792105809692714e-05, + "loss": 3.6476, + "step": 737792 + }, + { + "epoch": 0.02, + "learning_rate": 3.791267214941662e-05, + "loss": 3.6368, + "step": 738304 + }, + { + "epoch": 0.02, + "learning_rate": 3.79042862019061e-05, + "loss": 3.6377, + "step": 738816 + }, + { + "epoch": 0.02, + "learning_rate": 3.789590025439558e-05, + "loss": 3.6398, + "step": 739328 + }, + { + "epoch": 0.02, + "learning_rate": 3.788751430688506e-05, + "loss": 3.6439, + "step": 739840 + }, + { + "epoch": 0.02, + "learning_rate": 3.787912835937454e-05, + "loss": 3.6272, + "step": 740352 + }, + { + "epoch": 0.02, + "learning_rate": 3.7870742411864016e-05, + "loss": 3.6379, + "step": 740864 + }, + { + "epoch": 0.02, + "learning_rate": 3.78623564643535e-05, + "loss": 3.6289, + "step": 741376 + }, + { + "epoch": 0.02, + "learning_rate": 3.785397051684298e-05, + "loss": 3.6475, + "step": 741888 + }, + { + "epoch": 0.02, + "learning_rate": 3.784558456933246e-05, + "loss": 3.6257, + "step": 742400 + }, + { + "epoch": 0.02, + "learning_rate": 3.783719862182194e-05, + "loss": 3.6414, + "step": 742912 + }, + { + "epoch": 0.02, + "learning_rate": 3.782881267431142e-05, + "loss": 3.6292, + "step": 743424 + }, + { + "epoch": 0.02, + "learning_rate": 3.782044310560463e-05, + "loss": 3.6457, + "step": 743936 + }, + { + "epoch": 0.02, + "learning_rate": 3.781205715809411e-05, + "loss": 3.6385, + "step": 744448 + }, + { + "epoch": 0.02, + "learning_rate": 3.780367121058359e-05, + "loss": 3.6323, + "step": 744960 + }, + { + "epoch": 0.02, + "learning_rate": 3.779528526307307e-05, + "loss": 3.6394, + "step": 745472 + }, + { + "epoch": 0.02, + "learning_rate": 3.7786932073170016e-05, + "loss": 3.6545, + "step": 745984 + }, + { + "epoch": 0.02, + "learning_rate": 3.777854612565949e-05, + "loss": 3.6392, + "step": 746496 + }, + { + "epoch": 0.02, + "learning_rate": 3.777016017814897e-05, + "loss": 3.6443, + "step": 747008 + }, + { + "epoch": 0.02, + "learning_rate": 3.776177423063845e-05, + "loss": 3.6382, + "step": 747520 + }, + { + "epoch": 0.02, + "learning_rate": 3.7753388283127936e-05, + "loss": 3.6398, + "step": 748032 + }, + { + "epoch": 0.02, + "learning_rate": 3.7745002335617416e-05, + "loss": 3.6399, + "step": 748544 + }, + { + "epoch": 0.02, + "learning_rate": 3.7736616388106896e-05, + "loss": 3.6326, + "step": 749056 + }, + { + "epoch": 0.02, + "learning_rate": 3.7728230440596376e-05, + "loss": 3.6254, + "step": 749568 + }, + { + "epoch": 0.02, + "learning_rate": 3.7719860871889585e-05, + "loss": 3.6393, + "step": 750080 + }, + { + "epoch": 0.02, + "learning_rate": 3.7711474924379065e-05, + "loss": 3.6377, + "step": 750592 + }, + { + "epoch": 0.02, + "learning_rate": 3.7703105355672274e-05, + "loss": 3.6359, + "step": 751104 + }, + { + "epoch": 0.02, + "learning_rate": 3.7694719408161754e-05, + "loss": 3.6359, + "step": 751616 + }, + { + "epoch": 0.02, + "learning_rate": 3.7686333460651234e-05, + "loss": 3.6329, + "step": 752128 + }, + { + "epoch": 0.02, + "learning_rate": 3.7677947513140714e-05, + "loss": 3.6288, + "step": 752640 + }, + { + "epoch": 0.02, + "learning_rate": 3.7669561565630194e-05, + "loss": 3.6474, + "step": 753152 + }, + { + "epoch": 0.02, + "learning_rate": 3.76611919969234e-05, + "loss": 3.6375, + "step": 753664 + }, + { + "epoch": 0.02, + "learning_rate": 3.765280604941289e-05, + "loss": 3.6486, + "step": 754176 + }, + { + "epoch": 0.02, + "learning_rate": 3.764442010190237e-05, + "loss": 3.6223, + "step": 754688 + }, + { + "epoch": 0.02, + "learning_rate": 3.763603415439185e-05, + "loss": 3.6491, + "step": 755200 + }, + { + "epoch": 0.02, + "learning_rate": 3.762764820688133e-05, + "loss": 3.6379, + "step": 755712 + }, + { + "epoch": 0.02, + "learning_rate": 3.761927863817454e-05, + "loss": 3.6414, + "step": 756224 + }, + { + "epoch": 0.02, + "learning_rate": 3.761089269066402e-05, + "loss": 3.637, + "step": 756736 + }, + { + "epoch": 0.02, + "learning_rate": 3.76025067431535e-05, + "loss": 3.635, + "step": 757248 + }, + { + "epoch": 0.02, + "learning_rate": 3.759412079564298e-05, + "loss": 3.6372, + "step": 757760 + }, + { + "epoch": 0.02, + "learning_rate": 3.758573484813246e-05, + "loss": 3.6533, + "step": 758272 + }, + { + "epoch": 0.02, + "learning_rate": 3.757734890062194e-05, + "loss": 3.6236, + "step": 758784 + }, + { + "epoch": 0.02, + "learning_rate": 3.756896295311142e-05, + "loss": 3.6435, + "step": 759296 + }, + { + "epoch": 0.02, + "learning_rate": 3.756059338440463e-05, + "loss": 3.633, + "step": 759808 + }, + { + "epoch": 0.02, + "learning_rate": 3.755220743689411e-05, + "loss": 3.6301, + "step": 760320 + }, + { + "epoch": 0.02, + "learning_rate": 3.754382148938359e-05, + "loss": 3.6335, + "step": 760832 + }, + { + "epoch": 0.02, + "learning_rate": 3.7535435541873074e-05, + "loss": 3.6338, + "step": 761344 + }, + { + "epoch": 0.02, + "learning_rate": 3.7527049594362554e-05, + "loss": 3.6436, + "step": 761856 + }, + { + "epoch": 0.02, + "learning_rate": 3.751866364685203e-05, + "loss": 3.6361, + "step": 762368 + }, + { + "epoch": 0.02, + "learning_rate": 3.751027769934151e-05, + "loss": 3.6343, + "step": 762880 + }, + { + "epoch": 0.03, + "eval_loss": 3.8456995487213135, + "eval_runtime": 310.0215, + "eval_samples_per_second": 1230.853, + "eval_steps_per_second": 38.465, + "step": 763200 + }, + { + "epoch": 1.0, + "learning_rate": 3.750189175183099e-05, + "loss": 3.6319, + "step": 763392 + }, + { + "epoch": 1.0, + "learning_rate": 3.749353856192793e-05, + "loss": 3.6287, + "step": 763904 + }, + { + "epoch": 1.0, + "learning_rate": 3.748515261441741e-05, + "loss": 3.6328, + "step": 764416 + }, + { + "epoch": 1.0, + "learning_rate": 3.747676666690689e-05, + "loss": 3.6365, + "step": 764928 + }, + { + "epoch": 1.0, + "learning_rate": 3.746838071939637e-05, + "loss": 3.6452, + "step": 765440 + }, + { + "epoch": 1.0, + "learning_rate": 3.7459994771885845e-05, + "loss": 3.627, + "step": 765952 + }, + { + "epoch": 1.0, + "learning_rate": 3.7451608824375325e-05, + "loss": 3.6325, + "step": 766464 + }, + { + "epoch": 1.0, + "learning_rate": 3.744323925566854e-05, + "loss": 3.6323, + "step": 766976 + }, + { + "epoch": 1.0, + "learning_rate": 3.743485330815803e-05, + "loss": 3.6314, + "step": 767488 + }, + { + "epoch": 1.0, + "learning_rate": 3.74264673606475e-05, + "loss": 3.6376, + "step": 768000 + }, + { + "epoch": 1.0, + "learning_rate": 3.741808141313698e-05, + "loss": 3.6312, + "step": 768512 + }, + { + "epoch": 1.0, + "learning_rate": 3.740969546562646e-05, + "loss": 3.634, + "step": 769024 + }, + { + "epoch": 1.0, + "learning_rate": 3.740132589691968e-05, + "loss": 3.6358, + "step": 769536 + }, + { + "epoch": 1.0, + "learning_rate": 3.739293994940915e-05, + "loss": 3.6199, + "step": 770048 + }, + { + "epoch": 1.0, + "learning_rate": 3.738455400189863e-05, + "loss": 3.6304, + "step": 770560 + }, + { + "epoch": 1.0, + "learning_rate": 3.737616805438811e-05, + "loss": 3.6199, + "step": 771072 + }, + { + "epoch": 1.0, + "learning_rate": 3.736778210687759e-05, + "loss": 3.6319, + "step": 771584 + }, + { + "epoch": 1.0, + "learning_rate": 3.735939615936707e-05, + "loss": 3.6266, + "step": 772096 + }, + { + "epoch": 1.0, + "learning_rate": 3.735101021185655e-05, + "loss": 3.6225, + "step": 772608 + }, + { + "epoch": 1.0, + "learning_rate": 3.734262426434603e-05, + "loss": 3.6375, + "step": 773120 + }, + { + "epoch": 1.0, + "learning_rate": 3.7334254695639245e-05, + "loss": 3.636, + "step": 773632 + }, + { + "epoch": 1.0, + "learning_rate": 3.7325885126932455e-05, + "loss": 3.6387, + "step": 774144 + }, + { + "epoch": 1.0, + "learning_rate": 3.7317499179421934e-05, + "loss": 3.6331, + "step": 774656 + }, + { + "epoch": 1.0, + "learning_rate": 3.7309113231911414e-05, + "loss": 3.6288, + "step": 775168 + }, + { + "epoch": 1.0, + "learning_rate": 3.7300727284400894e-05, + "loss": 3.6299, + "step": 775680 + }, + { + "epoch": 1.0, + "learning_rate": 3.7292341336890374e-05, + "loss": 3.6324, + "step": 776192 + }, + { + "epoch": 1.0, + "learning_rate": 3.7283955389379854e-05, + "loss": 3.6307, + "step": 776704 + }, + { + "epoch": 1.0, + "learning_rate": 3.7275569441869334e-05, + "loss": 3.6298, + "step": 777216 + }, + { + "epoch": 1.0, + "learning_rate": 3.7267183494358814e-05, + "loss": 3.6239, + "step": 777728 + }, + { + "epoch": 1.0, + "learning_rate": 3.725881392565202e-05, + "loss": 3.6236, + "step": 778240 + }, + { + "epoch": 1.01, + "learning_rate": 3.725044435694523e-05, + "loss": 3.6231, + "step": 778752 + }, + { + "epoch": 1.01, + "learning_rate": 3.724205840943472e-05, + "loss": 3.633, + "step": 779264 + }, + { + "epoch": 1.01, + "learning_rate": 3.72336724619242e-05, + "loss": 3.6405, + "step": 779776 + }, + { + "epoch": 1.01, + "learning_rate": 3.722528651441368e-05, + "loss": 3.6267, + "step": 780288 + }, + { + "epoch": 1.01, + "learning_rate": 3.721691694570689e-05, + "loss": 3.6249, + "step": 780800 + }, + { + "epoch": 1.01, + "learning_rate": 3.720853099819637e-05, + "loss": 3.6244, + "step": 781312 + }, + { + "epoch": 1.01, + "learning_rate": 3.720014505068585e-05, + "loss": 3.6352, + "step": 781824 + }, + { + "epoch": 1.01, + "learning_rate": 3.719175910317533e-05, + "loss": 3.6281, + "step": 782336 + }, + { + "epoch": 1.01, + "learning_rate": 3.718337315566481e-05, + "loss": 3.6239, + "step": 782848 + }, + { + "epoch": 1.01, + "learning_rate": 3.717498720815429e-05, + "loss": 3.6164, + "step": 783360 + }, + { + "epoch": 1.01, + "learning_rate": 3.716660126064377e-05, + "loss": 3.6248, + "step": 783872 + }, + { + "epoch": 1.01, + "learning_rate": 3.715823169193698e-05, + "loss": 3.6252, + "step": 784384 + }, + { + "epoch": 1.01, + "learning_rate": 3.714984574442646e-05, + "loss": 3.6322, + "step": 784896 + }, + { + "epoch": 1.01, + "learning_rate": 3.714145979691594e-05, + "loss": 3.6294, + "step": 785408 + }, + { + "epoch": 1.01, + "learning_rate": 3.713307384940542e-05, + "loss": 3.6244, + "step": 785920 + }, + { + "epoch": 1.01, + "learning_rate": 3.71246879018949e-05, + "loss": 3.6258, + "step": 786432 + }, + { + "epoch": 1.01, + "learning_rate": 3.7116301954384384e-05, + "loss": 3.6274, + "step": 786944 + }, + { + "epoch": 1.01, + "learning_rate": 3.7107916006873863e-05, + "loss": 3.6242, + "step": 787456 + }, + { + "epoch": 1.01, + "learning_rate": 3.709953005936334e-05, + "loss": 3.5983, + "step": 787968 + }, + { + "epoch": 1.01, + "learning_rate": 3.709117686946028e-05, + "loss": 3.6321, + "step": 788480 + }, + { + "epoch": 1.01, + "learning_rate": 3.708279092194976e-05, + "loss": 3.6218, + "step": 788992 + }, + { + "epoch": 1.01, + "learning_rate": 3.707440497443924e-05, + "loss": 3.6318, + "step": 789504 + }, + { + "epoch": 1.01, + "learning_rate": 3.706601902692872e-05, + "loss": 3.6193, + "step": 790016 + }, + { + "epoch": 1.01, + "learning_rate": 3.70576330794182e-05, + "loss": 3.6196, + "step": 790528 + }, + { + "epoch": 1.01, + "learning_rate": 3.704924713190768e-05, + "loss": 3.6124, + "step": 791040 + }, + { + "epoch": 1.01, + "learning_rate": 3.7040861184397155e-05, + "loss": 3.6148, + "step": 791552 + }, + { + "epoch": 1.01, + "learning_rate": 3.7032475236886635e-05, + "loss": 3.6285, + "step": 792064 + }, + { + "epoch": 1.01, + "learning_rate": 3.702410566817985e-05, + "loss": 3.6063, + "step": 792576 + }, + { + "epoch": 1.01, + "learning_rate": 3.701571972066934e-05, + "loss": 3.6342, + "step": 793088 + }, + { + "epoch": 1.01, + "learning_rate": 3.700733377315881e-05, + "loss": 3.619, + "step": 793600 + }, + { + "epoch": 1.01, + "learning_rate": 3.699894782564829e-05, + "loss": 3.6145, + "step": 794112 + }, + { + "epoch": 1.01, + "learning_rate": 3.6990578256941506e-05, + "loss": 3.6382, + "step": 794624 + }, + { + "epoch": 1.01, + "learning_rate": 3.6982208688234715e-05, + "loss": 3.6145, + "step": 795136 + }, + { + "epoch": 1.01, + "learning_rate": 3.6973822740724195e-05, + "loss": 3.6091, + "step": 795648 + }, + { + "epoch": 1.01, + "learning_rate": 3.6965436793213675e-05, + "loss": 3.6246, + "step": 796160 + }, + { + "epoch": 1.01, + "learning_rate": 3.6957050845703155e-05, + "loss": 3.6238, + "step": 796672 + }, + { + "epoch": 1.01, + "learning_rate": 3.694866489819263e-05, + "loss": 3.6106, + "step": 797184 + }, + { + "epoch": 1.01, + "learning_rate": 3.694027895068211e-05, + "loss": 3.6191, + "step": 797696 + }, + { + "epoch": 1.01, + "learning_rate": 3.693189300317159e-05, + "loss": 3.6073, + "step": 798208 + }, + { + "epoch": 1.01, + "learning_rate": 3.6923523434464804e-05, + "loss": 3.6166, + "step": 798720 + }, + { + "epoch": 1.01, + "learning_rate": 3.6915137486954284e-05, + "loss": 3.624, + "step": 799232 + }, + { + "epoch": 1.01, + "learning_rate": 3.6906751539443764e-05, + "loss": 3.6225, + "step": 799744 + }, + { + "epoch": 1.01, + "learning_rate": 3.6898365591933244e-05, + "loss": 3.622, + "step": 800256 + }, + { + "epoch": 1.01, + "learning_rate": 3.6889979644422724e-05, + "loss": 3.6255, + "step": 800768 + }, + { + "epoch": 1.01, + "learning_rate": 3.6881593696912204e-05, + "loss": 3.6402, + "step": 801280 + }, + { + "epoch": 1.01, + "learning_rate": 3.6873207749401684e-05, + "loss": 3.6209, + "step": 801792 + }, + { + "epoch": 1.01, + "learning_rate": 3.686483818069489e-05, + "loss": 3.6275, + "step": 802304 + }, + { + "epoch": 1.01, + "learning_rate": 3.685645223318437e-05, + "loss": 3.6207, + "step": 802816 + }, + { + "epoch": 1.01, + "learning_rate": 3.684806628567385e-05, + "loss": 3.6168, + "step": 803328 + }, + { + "epoch": 1.01, + "learning_rate": 3.683968033816333e-05, + "loss": 3.6151, + "step": 803840 + }, + { + "epoch": 1.01, + "learning_rate": 3.683129439065281e-05, + "loss": 3.6351, + "step": 804352 + }, + { + "epoch": 1.01, + "learning_rate": 3.682290844314229e-05, + "loss": 3.6227, + "step": 804864 + }, + { + "epoch": 1.01, + "learning_rate": 3.681453887443551e-05, + "loss": 3.6303, + "step": 805376 + }, + { + "epoch": 1.01, + "learning_rate": 3.680615292692499e-05, + "loss": 3.6053, + "step": 805888 + }, + { + "epoch": 1.01, + "learning_rate": 3.679776697941447e-05, + "loss": 3.6131, + "step": 806400 + }, + { + "epoch": 1.01, + "learning_rate": 3.678938103190395e-05, + "loss": 3.6172, + "step": 806912 + }, + { + "epoch": 1.01, + "learning_rate": 3.678099508439343e-05, + "loss": 3.6208, + "step": 807424 + }, + { + "epoch": 1.01, + "learning_rate": 3.677262551568664e-05, + "loss": 3.6204, + "step": 807936 + }, + { + "epoch": 1.01, + "learning_rate": 3.676423956817612e-05, + "loss": 3.6196, + "step": 808448 + }, + { + "epoch": 1.01, + "learning_rate": 3.67558536206656e-05, + "loss": 3.6089, + "step": 808960 + }, + { + "epoch": 1.02, + "learning_rate": 3.674746767315508e-05, + "loss": 3.6218, + "step": 809472 + }, + { + "epoch": 1.02, + "learning_rate": 3.673908172564456e-05, + "loss": 3.614, + "step": 809984 + }, + { + "epoch": 1.02, + "learning_rate": 3.6730712156937766e-05, + "loss": 3.6154, + "step": 810496 + }, + { + "epoch": 1.02, + "learning_rate": 3.6722326209427246e-05, + "loss": 3.6249, + "step": 811008 + }, + { + "epoch": 1.02, + "learning_rate": 3.6713940261916726e-05, + "loss": 3.6302, + "step": 811520 + }, + { + "epoch": 1.02, + "learning_rate": 3.670555431440621e-05, + "loss": 3.6173, + "step": 812032 + }, + { + "epoch": 1.02, + "learning_rate": 3.669716836689569e-05, + "loss": 3.6149, + "step": 812544 + }, + { + "epoch": 1.02, + "learning_rate": 3.668878241938517e-05, + "loss": 3.6205, + "step": 813056 + }, + { + "epoch": 1.02, + "learning_rate": 3.6680396471874646e-05, + "loss": 3.6111, + "step": 813568 + }, + { + "epoch": 1.02, + "learning_rate": 3.667202690316786e-05, + "loss": 3.6255, + "step": 814080 + }, + { + "epoch": 1.02, + "learning_rate": 3.666365733446107e-05, + "loss": 3.6149, + "step": 814592 + }, + { + "epoch": 1.02, + "learning_rate": 3.665527138695055e-05, + "loss": 3.6196, + "step": 815104 + }, + { + "epoch": 1.02, + "learning_rate": 3.664688543944003e-05, + "loss": 3.618, + "step": 815616 + }, + { + "epoch": 1.02, + "learning_rate": 3.663849949192951e-05, + "loss": 3.617, + "step": 816128 + }, + { + "epoch": 1.02, + "learning_rate": 3.663011354441899e-05, + "loss": 3.6137, + "step": 816640 + }, + { + "epoch": 1.02, + "learning_rate": 3.6621727596908464e-05, + "loss": 3.6138, + "step": 817152 + }, + { + "epoch": 1.02, + "learning_rate": 3.661334164939795e-05, + "loss": 3.6034, + "step": 817664 + }, + { + "epoch": 1.02, + "learning_rate": 3.660495570188743e-05, + "loss": 3.6251, + "step": 818176 + }, + { + "epoch": 1.02, + "learning_rate": 3.659658613318065e-05, + "loss": 3.6142, + "step": 818688 + }, + { + "epoch": 1.02, + "learning_rate": 3.658820018567012e-05, + "loss": 3.6146, + "step": 819200 + }, + { + "epoch": 1.02, + "learning_rate": 3.65798142381596e-05, + "loss": 3.6054, + "step": 819712 + }, + { + "epoch": 1.02, + "learning_rate": 3.657142829064908e-05, + "loss": 3.6227, + "step": 820224 + }, + { + "epoch": 1.02, + "learning_rate": 3.656304234313856e-05, + "loss": 3.6219, + "step": 820736 + }, + { + "epoch": 1.02, + "learning_rate": 3.655465639562804e-05, + "loss": 3.6101, + "step": 821248 + }, + { + "epoch": 1.02, + "learning_rate": 3.654627044811752e-05, + "loss": 3.6187, + "step": 821760 + }, + { + "epoch": 1.02, + "learning_rate": 3.653790087941073e-05, + "loss": 3.6308, + "step": 822272 + }, + { + "epoch": 1.02, + "learning_rate": 3.652951493190021e-05, + "loss": 3.6153, + "step": 822784 + }, + { + "epoch": 1.02, + "learning_rate": 3.652112898438969e-05, + "loss": 3.6213, + "step": 823296 + }, + { + "epoch": 1.02, + "learning_rate": 3.651274303687917e-05, + "loss": 3.6182, + "step": 823808 + }, + { + "epoch": 1.02, + "learning_rate": 3.650435708936865e-05, + "loss": 3.6186, + "step": 824320 + }, + { + "epoch": 1.02, + "learning_rate": 3.649597114185813e-05, + "loss": 3.6189, + "step": 824832 + }, + { + "epoch": 1.02, + "learning_rate": 3.6487585194347615e-05, + "loss": 3.61, + "step": 825344 + }, + { + "epoch": 1.02, + "learning_rate": 3.6479199246837095e-05, + "loss": 3.609, + "step": 825856 + }, + { + "epoch": 1.02, + "learning_rate": 3.6470829678130304e-05, + "loss": 3.6168, + "step": 826368 + }, + { + "epoch": 1.02, + "learning_rate": 3.6462460109423513e-05, + "loss": 3.6188, + "step": 826880 + }, + { + "epoch": 1.02, + "learning_rate": 3.645407416191299e-05, + "loss": 3.6171, + "step": 827392 + }, + { + "epoch": 1.02, + "learning_rate": 3.644568821440247e-05, + "loss": 3.6097, + "step": 827904 + }, + { + "epoch": 1.02, + "learning_rate": 3.643730226689195e-05, + "loss": 3.6131, + "step": 828416 + }, + { + "epoch": 1.02, + "learning_rate": 3.642891631938143e-05, + "loss": 3.6064, + "step": 828928 + }, + { + "epoch": 1.02, + "learning_rate": 3.642053037187091e-05, + "loss": 3.6257, + "step": 829440 + }, + { + "epoch": 1.02, + "learning_rate": 3.641214442436039e-05, + "loss": 3.6169, + "step": 829952 + }, + { + "epoch": 1.02, + "learning_rate": 3.64037748556536e-05, + "loss": 3.6292, + "step": 830464 + }, + { + "epoch": 1.02, + "learning_rate": 3.639538890814308e-05, + "loss": 3.6045, + "step": 830976 + }, + { + "epoch": 1.02, + "learning_rate": 3.638700296063257e-05, + "loss": 3.6241, + "step": 831488 + }, + { + "epoch": 1.02, + "learning_rate": 3.637861701312205e-05, + "loss": 3.6188, + "step": 832000 + }, + { + "epoch": 1.02, + "learning_rate": 3.637023106561153e-05, + "loss": 3.6185, + "step": 832512 + }, + { + "epoch": 1.02, + "learning_rate": 3.636184511810101e-05, + "loss": 3.619, + "step": 833024 + }, + { + "epoch": 1.02, + "learning_rate": 3.635345917059048e-05, + "loss": 3.6156, + "step": 833536 + }, + { + "epoch": 1.02, + "learning_rate": 3.634507322307996e-05, + "loss": 3.6164, + "step": 834048 + }, + { + "epoch": 1.02, + "learning_rate": 3.633670365437318e-05, + "loss": 3.6305, + "step": 834560 + }, + { + "epoch": 1.02, + "learning_rate": 3.632831770686265e-05, + "loss": 3.6069, + "step": 835072 + }, + { + "epoch": 1.02, + "learning_rate": 3.631993175935213e-05, + "loss": 3.6211, + "step": 835584 + }, + { + "epoch": 1.02, + "learning_rate": 3.631154581184161e-05, + "loss": 3.6102, + "step": 836096 + }, + { + "epoch": 1.02, + "learning_rate": 3.630317624313483e-05, + "loss": 3.6092, + "step": 836608 + }, + { + "epoch": 1.02, + "learning_rate": 3.629479029562431e-05, + "loss": 3.6126, + "step": 837120 + }, + { + "epoch": 1.02, + "learning_rate": 3.628640434811379e-05, + "loss": 3.61, + "step": 837632 + }, + { + "epoch": 1.02, + "learning_rate": 3.627801840060327e-05, + "loss": 3.6223, + "step": 838144 + }, + { + "epoch": 1.02, + "learning_rate": 3.6269632453092747e-05, + "loss": 3.6172, + "step": 838656 + }, + { + "epoch": 1.02, + "learning_rate": 3.6261262884385956e-05, + "loss": 3.6135, + "step": 839168 + }, + { + "epoch": 1.03, + "eval_loss": 3.8439671993255615, + "eval_runtime": 420.7181, + "eval_samples_per_second": 906.999, + "eval_steps_per_second": 28.344, + "step": 839520 + }, + { + "epoch": 0.0, + "learning_rate": 3.6252876936875436e-05, + "loss": 3.6093, + "step": 839680 + }, + { + "epoch": 0.0, + "learning_rate": 3.6244490989364916e-05, + "loss": 3.6087, + "step": 840192 + }, + { + "epoch": 0.0, + "learning_rate": 3.6236105041854396e-05, + "loss": 3.6153, + "step": 840704 + }, + { + "epoch": 0.0, + "learning_rate": 3.6227719094343876e-05, + "loss": 3.6143, + "step": 841216 + }, + { + "epoch": 0.0, + "learning_rate": 3.6219333146833355e-05, + "loss": 3.6242, + "step": 841728 + }, + { + "epoch": 0.0, + "learning_rate": 3.6210947199322835e-05, + "loss": 3.6063, + "step": 842240 + }, + { + "epoch": 0.0, + "learning_rate": 3.6202577630616045e-05, + "loss": 3.6137, + "step": 842752 + }, + { + "epoch": 0.0, + "learning_rate": 3.6194191683105524e-05, + "loss": 3.6116, + "step": 843264 + }, + { + "epoch": 0.0, + "learning_rate": 3.6185805735595004e-05, + "loss": 3.6092, + "step": 843776 + }, + { + "epoch": 0.0, + "learning_rate": 3.617741978808449e-05, + "loss": 3.6133, + "step": 844288 + }, + { + "epoch": 0.0, + "learning_rate": 3.616903384057397e-05, + "loss": 3.6161, + "step": 844800 + }, + { + "epoch": 0.0, + "learning_rate": 3.616066427186718e-05, + "loss": 3.6107, + "step": 845312 + }, + { + "epoch": 0.0, + "learning_rate": 3.615227832435666e-05, + "loss": 3.6146, + "step": 845824 + }, + { + "epoch": 0.0, + "learning_rate": 3.614389237684614e-05, + "loss": 3.5986, + "step": 846336 + }, + { + "epoch": 0.0, + "learning_rate": 3.613550642933562e-05, + "loss": 3.6065, + "step": 846848 + }, + { + "epoch": 0.0, + "learning_rate": 3.612713686062883e-05, + "loss": 3.5975, + "step": 847360 + }, + { + "epoch": 0.0, + "learning_rate": 3.611875091311831e-05, + "loss": 3.6168, + "step": 847872 + }, + { + "epoch": 0.0, + "learning_rate": 3.611036496560779e-05, + "loss": 3.6062, + "step": 848384 + }, + { + "epoch": 0.0, + "learning_rate": 3.610197901809727e-05, + "loss": 3.6026, + "step": 848896 + }, + { + "epoch": 0.0, + "learning_rate": 3.609359307058675e-05, + "loss": 3.6167, + "step": 849408 + }, + { + "epoch": 0.0, + "learning_rate": 3.608520712307623e-05, + "loss": 3.6182, + "step": 849920 + }, + { + "epoch": 0.0, + "learning_rate": 3.6076837554369445e-05, + "loss": 3.6146, + "step": 850432 + }, + { + "epoch": 0.0, + "learning_rate": 3.6068451606858925e-05, + "loss": 3.6154, + "step": 850944 + }, + { + "epoch": 0.0, + "learning_rate": 3.6060065659348405e-05, + "loss": 3.6074, + "step": 851456 + }, + { + "epoch": 0.0, + "learning_rate": 3.6051679711837885e-05, + "loss": 3.6097, + "step": 851968 + }, + { + "epoch": 0.0, + "learning_rate": 3.6043293764327365e-05, + "loss": 3.6158, + "step": 852480 + }, + { + "epoch": 0.0, + "learning_rate": 3.603490781681684e-05, + "loss": 3.6031, + "step": 852992 + }, + { + "epoch": 0.0, + "learning_rate": 3.6026538248110054e-05, + "loss": 3.6119, + "step": 853504 + }, + { + "epoch": 0.0, + "learning_rate": 3.6018152300599534e-05, + "loss": 3.6051, + "step": 854016 + }, + { + "epoch": 0.0, + "learning_rate": 3.6009766353089014e-05, + "loss": 3.6031, + "step": 854528 + }, + { + "epoch": 0.01, + "learning_rate": 3.600138040557849e-05, + "loss": 3.6044, + "step": 855040 + }, + { + "epoch": 0.01, + "learning_rate": 3.599299445806797e-05, + "loss": 3.6134, + "step": 855552 + }, + { + "epoch": 0.01, + "learning_rate": 3.598460851055745e-05, + "loss": 3.6142, + "step": 856064 + }, + { + "epoch": 0.01, + "learning_rate": 3.597622256304693e-05, + "loss": 3.6151, + "step": 856576 + }, + { + "epoch": 0.01, + "learning_rate": 3.596785299434014e-05, + "loss": 3.6031, + "step": 857088 + }, + { + "epoch": 0.01, + "learning_rate": 3.595946704682962e-05, + "loss": 3.6037, + "step": 857600 + }, + { + "epoch": 0.01, + "learning_rate": 3.59510810993191e-05, + "loss": 3.6171, + "step": 858112 + }, + { + "epoch": 0.01, + "learning_rate": 3.594269515180858e-05, + "loss": 3.6047, + "step": 858624 + }, + { + "epoch": 0.01, + "learning_rate": 3.593430920429806e-05, + "loss": 3.6071, + "step": 859136 + }, + { + "epoch": 0.01, + "learning_rate": 3.592592325678754e-05, + "loss": 3.5961, + "step": 859648 + }, + { + "epoch": 0.01, + "learning_rate": 3.591753730927702e-05, + "loss": 3.6025, + "step": 860160 + }, + { + "epoch": 0.01, + "learning_rate": 3.590916774057023e-05, + "loss": 3.6052, + "step": 860672 + }, + { + "epoch": 0.01, + "learning_rate": 3.590078179305971e-05, + "loss": 3.6128, + "step": 861184 + }, + { + "epoch": 0.01, + "learning_rate": 3.589239584554919e-05, + "loss": 3.6083, + "step": 861696 + }, + { + "epoch": 0.01, + "learning_rate": 3.588400989803867e-05, + "loss": 3.6021, + "step": 862208 + }, + { + "epoch": 0.01, + "learning_rate": 3.587562395052815e-05, + "loss": 3.6097, + "step": 862720 + }, + { + "epoch": 0.01, + "learning_rate": 3.586725438182136e-05, + "loss": 3.6056, + "step": 863232 + }, + { + "epoch": 0.01, + "learning_rate": 3.585886843431085e-05, + "loss": 3.6063, + "step": 863744 + }, + { + "epoch": 0.01, + "learning_rate": 3.585048248680033e-05, + "loss": 3.5822, + "step": 864256 + }, + { + "epoch": 0.01, + "learning_rate": 3.584209653928981e-05, + "loss": 3.6107, + "step": 864768 + }, + { + "epoch": 0.01, + "learning_rate": 3.583371059177929e-05, + "loss": 3.6017, + "step": 865280 + }, + { + "epoch": 0.01, + "learning_rate": 3.5825341023072496e-05, + "loss": 3.6101, + "step": 865792 + }, + { + "epoch": 0.01, + "learning_rate": 3.5816955075561976e-05, + "loss": 3.6016, + "step": 866304 + }, + { + "epoch": 0.01, + "learning_rate": 3.5808569128051456e-05, + "loss": 3.5953, + "step": 866816 + }, + { + "epoch": 0.01, + "learning_rate": 3.5800183180540936e-05, + "loss": 3.5946, + "step": 867328 + }, + { + "epoch": 0.01, + "learning_rate": 3.5791797233030416e-05, + "loss": 3.594, + "step": 867840 + }, + { + "epoch": 0.01, + "learning_rate": 3.5783411285519896e-05, + "loss": 3.6081, + "step": 868352 + }, + { + "epoch": 0.01, + "learning_rate": 3.5775041716813105e-05, + "loss": 3.5875, + "step": 868864 + }, + { + "epoch": 0.01, + "learning_rate": 3.5766655769302585e-05, + "loss": 3.6146, + "step": 869376 + }, + { + "epoch": 0.01, + "learning_rate": 3.5758269821792065e-05, + "loss": 3.6001, + "step": 869888 + }, + { + "epoch": 0.01, + "learning_rate": 3.5749883874281545e-05, + "loss": 3.5919, + "step": 870400 + }, + { + "epoch": 0.01, + "learning_rate": 3.574149792677103e-05, + "loss": 3.6167, + "step": 870912 + }, + { + "epoch": 0.01, + "learning_rate": 3.573312835806424e-05, + "loss": 3.5945, + "step": 871424 + }, + { + "epoch": 0.01, + "learning_rate": 3.572474241055372e-05, + "loss": 3.591, + "step": 871936 + }, + { + "epoch": 0.01, + "learning_rate": 3.57163564630432e-05, + "loss": 3.6035, + "step": 872448 + }, + { + "epoch": 0.01, + "learning_rate": 3.5707970515532674e-05, + "loss": 3.6034, + "step": 872960 + }, + { + "epoch": 0.01, + "learning_rate": 3.5699584568022154e-05, + "loss": 3.5895, + "step": 873472 + }, + { + "epoch": 0.01, + "learning_rate": 3.5691198620511634e-05, + "loss": 3.6018, + "step": 873984 + }, + { + "epoch": 0.01, + "learning_rate": 3.5682812673001114e-05, + "loss": 3.5865, + "step": 874496 + }, + { + "epoch": 0.01, + "learning_rate": 3.567444310429432e-05, + "loss": 3.5962, + "step": 875008 + }, + { + "epoch": 0.01, + "learning_rate": 3.56660571567838e-05, + "loss": 3.6063, + "step": 875520 + }, + { + "epoch": 0.01, + "learning_rate": 3.565767120927328e-05, + "loss": 3.6034, + "step": 876032 + }, + { + "epoch": 0.01, + "learning_rate": 3.564928526176277e-05, + "loss": 3.6046, + "step": 876544 + }, + { + "epoch": 0.01, + "learning_rate": 3.564091569305598e-05, + "loss": 3.6048, + "step": 877056 + }, + { + "epoch": 0.01, + "learning_rate": 3.563252974554546e-05, + "loss": 3.6209, + "step": 877568 + }, + { + "epoch": 0.01, + "learning_rate": 3.562414379803494e-05, + "loss": 3.601, + "step": 878080 + }, + { + "epoch": 0.01, + "learning_rate": 3.561575785052442e-05, + "loss": 3.6088, + "step": 878592 + }, + { + "epoch": 0.01, + "learning_rate": 3.56073719030139e-05, + "loss": 3.6003, + "step": 879104 + }, + { + "epoch": 0.01, + "learning_rate": 3.559898595550338e-05, + "loss": 3.5958, + "step": 879616 + }, + { + "epoch": 0.01, + "learning_rate": 3.559060000799286e-05, + "loss": 3.597, + "step": 880128 + }, + { + "epoch": 0.01, + "learning_rate": 3.558221406048234e-05, + "loss": 3.6113, + "step": 880640 + }, + { + "epoch": 0.01, + "learning_rate": 3.5573860870579276e-05, + "loss": 3.6047, + "step": 881152 + }, + { + "epoch": 0.01, + "learning_rate": 3.5565474923068756e-05, + "loss": 3.61, + "step": 881664 + }, + { + "epoch": 0.01, + "learning_rate": 3.5557088975558236e-05, + "loss": 3.5892, + "step": 882176 + }, + { + "epoch": 0.01, + "learning_rate": 3.554870302804772e-05, + "loss": 3.5944, + "step": 882688 + }, + { + "epoch": 0.01, + "learning_rate": 3.554033345934093e-05, + "loss": 3.5977, + "step": 883200 + }, + { + "epoch": 0.01, + "learning_rate": 3.553194751183041e-05, + "loss": 3.598, + "step": 883712 + }, + { + "epoch": 0.01, + "learning_rate": 3.552356156431989e-05, + "loss": 3.5991, + "step": 884224 + }, + { + "epoch": 0.01, + "learning_rate": 3.551517561680937e-05, + "loss": 3.6, + "step": 884736 + }, + { + "epoch": 0.01, + "learning_rate": 3.550678966929885e-05, + "loss": 3.593, + "step": 885248 + }, + { + "epoch": 0.02, + "learning_rate": 3.549842010059206e-05, + "loss": 3.599, + "step": 885760 + }, + { + "epoch": 0.02, + "learning_rate": 3.549003415308154e-05, + "loss": 3.5955, + "step": 886272 + }, + { + "epoch": 0.02, + "learning_rate": 3.548164820557102e-05, + "loss": 3.5943, + "step": 886784 + }, + { + "epoch": 0.02, + "learning_rate": 3.54732622580605e-05, + "loss": 3.6059, + "step": 887296 + }, + { + "epoch": 0.02, + "learning_rate": 3.546487631054998e-05, + "loss": 3.6114, + "step": 887808 + }, + { + "epoch": 0.02, + "learning_rate": 3.545650674184319e-05, + "loss": 3.6032, + "step": 888320 + }, + { + "epoch": 0.02, + "learning_rate": 3.5448120794332677e-05, + "loss": 3.5906, + "step": 888832 + }, + { + "epoch": 0.02, + "learning_rate": 3.5439734846822157e-05, + "loss": 3.6012, + "step": 889344 + }, + { + "epoch": 0.02, + "learning_rate": 3.5431348899311636e-05, + "loss": 3.5925, + "step": 889856 + }, + { + "epoch": 0.02, + "learning_rate": 3.5422962951801116e-05, + "loss": 3.6016, + "step": 890368 + }, + { + "epoch": 0.02, + "learning_rate": 3.5414609761898055e-05, + "loss": 3.6015, + "step": 890880 + }, + { + "epoch": 0.02, + "learning_rate": 3.5406223814387535e-05, + "loss": 3.5947, + "step": 891392 + }, + { + "epoch": 0.02, + "learning_rate": 3.5397837866877015e-05, + "loss": 3.601, + "step": 891904 + }, + { + "epoch": 0.02, + "learning_rate": 3.5389451919366495e-05, + "loss": 3.5947, + "step": 892416 + }, + { + "epoch": 0.02, + "learning_rate": 3.5381065971855975e-05, + "loss": 3.5966, + "step": 892928 + }, + { + "epoch": 0.02, + "learning_rate": 3.5372680024345454e-05, + "loss": 3.5896, + "step": 893440 + }, + { + "epoch": 0.02, + "learning_rate": 3.5364294076834934e-05, + "loss": 3.5892, + "step": 893952 + }, + { + "epoch": 0.02, + "learning_rate": 3.5355908129324414e-05, + "loss": 3.6049, + "step": 894464 + }, + { + "epoch": 0.02, + "learning_rate": 3.5347538560617623e-05, + "loss": 3.5958, + "step": 894976 + }, + { + "epoch": 0.02, + "learning_rate": 3.533915261310711e-05, + "loss": 3.593, + "step": 895488 + }, + { + "epoch": 0.02, + "learning_rate": 3.533076666559659e-05, + "loss": 3.5857, + "step": 896000 + }, + { + "epoch": 0.02, + "learning_rate": 3.532238071808607e-05, + "loss": 3.5972, + "step": 896512 + }, + { + "epoch": 0.02, + "learning_rate": 3.531399477057555e-05, + "loss": 3.6039, + "step": 897024 + }, + { + "epoch": 0.02, + "learning_rate": 3.530560882306503e-05, + "loss": 3.5923, + "step": 897536 + }, + { + "epoch": 0.02, + "learning_rate": 3.529723925435824e-05, + "loss": 3.6008, + "step": 898048 + }, + { + "epoch": 0.02, + "learning_rate": 3.528885330684772e-05, + "loss": 3.6091, + "step": 898560 + }, + { + "epoch": 0.02, + "learning_rate": 3.52804673593372e-05, + "loss": 3.601, + "step": 899072 + }, + { + "epoch": 0.02, + "learning_rate": 3.527208141182668e-05, + "loss": 3.5989, + "step": 899584 + }, + { + "epoch": 0.02, + "learning_rate": 3.526369546431616e-05, + "loss": 3.6009, + "step": 900096 + }, + { + "epoch": 0.02, + "learning_rate": 3.525532589560937e-05, + "loss": 3.5989, + "step": 900608 + }, + { + "epoch": 0.02, + "learning_rate": 3.524693994809885e-05, + "loss": 3.5972, + "step": 901120 + }, + { + "epoch": 0.02, + "learning_rate": 3.523855400058833e-05, + "loss": 3.5896, + "step": 901632 + }, + { + "epoch": 0.02, + "learning_rate": 3.523016805307781e-05, + "loss": 3.59, + "step": 902144 + }, + { + "epoch": 0.02, + "learning_rate": 3.5221798484371024e-05, + "loss": 3.5984, + "step": 902656 + }, + { + "epoch": 0.02, + "learning_rate": 3.5213412536860504e-05, + "loss": 3.5984, + "step": 903168 + }, + { + "epoch": 0.02, + "learning_rate": 3.5205026589349984e-05, + "loss": 3.5945, + "step": 903680 + }, + { + "epoch": 0.02, + "learning_rate": 3.519664064183946e-05, + "loss": 3.5964, + "step": 904192 + }, + { + "epoch": 0.02, + "learning_rate": 3.518825469432894e-05, + "loss": 3.5882, + "step": 904704 + }, + { + "epoch": 0.02, + "learning_rate": 3.517988512562215e-05, + "loss": 3.5872, + "step": 905216 + }, + { + "epoch": 0.02, + "learning_rate": 3.517149917811163e-05, + "loss": 3.6052, + "step": 905728 + }, + { + "epoch": 0.02, + "learning_rate": 3.5163113230601106e-05, + "loss": 3.5943, + "step": 906240 + }, + { + "epoch": 0.02, + "learning_rate": 3.5154727283090586e-05, + "loss": 3.6129, + "step": 906752 + }, + { + "epoch": 0.02, + "learning_rate": 3.51463577143838e-05, + "loss": 3.5832, + "step": 907264 + }, + { + "epoch": 0.02, + "learning_rate": 3.513797176687328e-05, + "loss": 3.6049, + "step": 907776 + }, + { + "epoch": 0.02, + "learning_rate": 3.512958581936276e-05, + "loss": 3.6014, + "step": 908288 + }, + { + "epoch": 0.02, + "learning_rate": 3.512119987185224e-05, + "loss": 3.5999, + "step": 908800 + }, + { + "epoch": 0.02, + "learning_rate": 3.511281392434172e-05, + "loss": 3.5938, + "step": 909312 + }, + { + "epoch": 0.02, + "learning_rate": 3.51044279768312e-05, + "loss": 3.5996, + "step": 909824 + }, + { + "epoch": 0.02, + "learning_rate": 3.509605840812441e-05, + "loss": 3.6002, + "step": 910336 + }, + { + "epoch": 0.02, + "learning_rate": 3.508767246061389e-05, + "loss": 3.6065, + "step": 910848 + }, + { + "epoch": 0.02, + "learning_rate": 3.507928651310337e-05, + "loss": 3.5894, + "step": 911360 + }, + { + "epoch": 0.02, + "learning_rate": 3.507090056559285e-05, + "loss": 3.5948, + "step": 911872 + }, + { + "epoch": 0.02, + "learning_rate": 3.506253099688606e-05, + "loss": 3.5962, + "step": 912384 + }, + { + "epoch": 0.02, + "learning_rate": 3.505414504937554e-05, + "loss": 3.5905, + "step": 912896 + }, + { + "epoch": 0.02, + "learning_rate": 3.504575910186502e-05, + "loss": 3.5894, + "step": 913408 + }, + { + "epoch": 0.02, + "learning_rate": 3.50373731543545e-05, + "loss": 3.5975, + "step": 913920 + }, + { + "epoch": 0.02, + "learning_rate": 3.5028987206843986e-05, + "loss": 3.6004, + "step": 914432 + }, + { + "epoch": 0.02, + "learning_rate": 3.5020601259333466e-05, + "loss": 3.5987, + "step": 914944 + }, + { + "epoch": 0.02, + "learning_rate": 3.5012231690626675e-05, + "loss": 3.5948, + "step": 915456 + }, + { + "epoch": 0.03, + "eval_loss": 3.8424651622772217, + "eval_runtime": 305.8796, + "eval_samples_per_second": 1247.52, + "eval_steps_per_second": 38.986, + "step": 915840 + }, + { + "epoch": 1.0, + "learning_rate": 3.5003845743116155e-05, + "loss": 3.5924, + "step": 915968 + }, + { + "epoch": 1.0, + "learning_rate": 3.4995459795605635e-05, + "loss": 3.5848, + "step": 916480 + }, + { + "epoch": 1.0, + "learning_rate": 3.4987073848095115e-05, + "loss": 3.5986, + "step": 916992 + }, + { + "epoch": 1.0, + "learning_rate": 3.4978687900584595e-05, + "loss": 3.5964, + "step": 917504 + }, + { + "epoch": 1.0, + "learning_rate": 3.4970318331877804e-05, + "loss": 3.6, + "step": 918016 + }, + { + "epoch": 1.0, + "learning_rate": 3.4961932384367284e-05, + "loss": 3.5924, + "step": 918528 + }, + { + "epoch": 1.0, + "learning_rate": 3.4953546436856764e-05, + "loss": 3.5956, + "step": 919040 + }, + { + "epoch": 1.0, + "learning_rate": 3.4945160489346244e-05, + "loss": 3.592, + "step": 919552 + }, + { + "epoch": 1.0, + "learning_rate": 3.4936774541835724e-05, + "loss": 3.5882, + "step": 920064 + }, + { + "epoch": 1.0, + "learning_rate": 3.4928388594325204e-05, + "loss": 3.5943, + "step": 920576 + }, + { + "epoch": 1.0, + "learning_rate": 3.492001902561842e-05, + "loss": 3.5993, + "step": 921088 + }, + { + "epoch": 1.0, + "learning_rate": 3.49116330781079e-05, + "loss": 3.5906, + "step": 921600 + }, + { + "epoch": 1.0, + "learning_rate": 3.490324713059738e-05, + "loss": 3.602, + "step": 922112 + }, + { + "epoch": 1.0, + "learning_rate": 3.489486118308686e-05, + "loss": 3.5752, + "step": 922624 + }, + { + "epoch": 1.0, + "learning_rate": 3.488649161438007e-05, + "loss": 3.5866, + "step": 923136 + }, + { + "epoch": 1.0, + "learning_rate": 3.487810566686955e-05, + "loss": 3.5826, + "step": 923648 + }, + { + "epoch": 1.0, + "learning_rate": 3.486971971935903e-05, + "loss": 3.5985, + "step": 924160 + }, + { + "epoch": 1.0, + "learning_rate": 3.486133377184851e-05, + "loss": 3.5857, + "step": 924672 + }, + { + "epoch": 1.0, + "learning_rate": 3.485294782433799e-05, + "loss": 3.584, + "step": 925184 + }, + { + "epoch": 1.0, + "learning_rate": 3.484456187682746e-05, + "loss": 3.5923, + "step": 925696 + }, + { + "epoch": 1.0, + "learning_rate": 3.483619230812068e-05, + "loss": 3.5993, + "step": 926208 + }, + { + "epoch": 1.0, + "learning_rate": 3.482780636061016e-05, + "loss": 3.5968, + "step": 926720 + }, + { + "epoch": 1.0, + "learning_rate": 3.481942041309964e-05, + "loss": 3.5951, + "step": 927232 + }, + { + "epoch": 1.0, + "learning_rate": 3.481103446558912e-05, + "loss": 3.59, + "step": 927744 + }, + { + "epoch": 1.0, + "learning_rate": 3.48026485180786e-05, + "loss": 3.5956, + "step": 928256 + }, + { + "epoch": 1.0, + "learning_rate": 3.479426257056808e-05, + "loss": 3.5929, + "step": 928768 + }, + { + "epoch": 1.0, + "learning_rate": 3.478587662305756e-05, + "loss": 3.5837, + "step": 929280 + }, + { + "epoch": 1.0, + "learning_rate": 3.477749067554704e-05, + "loss": 3.5944, + "step": 929792 + }, + { + "epoch": 1.0, + "learning_rate": 3.4769121106840246e-05, + "loss": 3.5852, + "step": 930304 + }, + { + "epoch": 1.0, + "learning_rate": 3.4760735159329726e-05, + "loss": 3.5832, + "step": 930816 + }, + { + "epoch": 1.01, + "learning_rate": 3.4752365590622935e-05, + "loss": 3.5868, + "step": 931328 + }, + { + "epoch": 1.01, + "learning_rate": 3.4743979643112415e-05, + "loss": 3.5928, + "step": 931840 + }, + { + "epoch": 1.01, + "learning_rate": 3.4735593695601895e-05, + "loss": 3.5971, + "step": 932352 + }, + { + "epoch": 1.01, + "learning_rate": 3.4727207748091375e-05, + "loss": 3.5971, + "step": 932864 + }, + { + "epoch": 1.01, + "learning_rate": 3.471883817938459e-05, + "loss": 3.5851, + "step": 933376 + }, + { + "epoch": 1.01, + "learning_rate": 3.471045223187407e-05, + "loss": 3.5837, + "step": 933888 + }, + { + "epoch": 1.01, + "learning_rate": 3.470206628436355e-05, + "loss": 3.5982, + "step": 934400 + }, + { + "epoch": 1.01, + "learning_rate": 3.469368033685303e-05, + "loss": 3.5831, + "step": 934912 + }, + { + "epoch": 1.01, + "learning_rate": 3.468529438934251e-05, + "loss": 3.5875, + "step": 935424 + }, + { + "epoch": 1.01, + "learning_rate": 3.467690844183199e-05, + "loss": 3.582, + "step": 935936 + }, + { + "epoch": 1.01, + "learning_rate": 3.466852249432147e-05, + "loss": 3.5823, + "step": 936448 + }, + { + "epoch": 1.01, + "learning_rate": 3.466015292561468e-05, + "loss": 3.5859, + "step": 936960 + }, + { + "epoch": 1.01, + "learning_rate": 3.465176697810416e-05, + "loss": 3.595, + "step": 937472 + }, + { + "epoch": 1.01, + "learning_rate": 3.464338103059364e-05, + "loss": 3.5904, + "step": 937984 + }, + { + "epoch": 1.01, + "learning_rate": 3.463499508308312e-05, + "loss": 3.5827, + "step": 938496 + }, + { + "epoch": 1.01, + "learning_rate": 3.46266091355726e-05, + "loss": 3.5898, + "step": 939008 + }, + { + "epoch": 1.01, + "learning_rate": 3.461823956686581e-05, + "loss": 3.5854, + "step": 939520 + }, + { + "epoch": 1.01, + "learning_rate": 3.4609853619355296e-05, + "loss": 3.5893, + "step": 940032 + }, + { + "epoch": 1.01, + "learning_rate": 3.4601467671844776e-05, + "loss": 3.5653, + "step": 940544 + }, + { + "epoch": 1.01, + "learning_rate": 3.4593081724334256e-05, + "loss": 3.5921, + "step": 941056 + }, + { + "epoch": 1.01, + "learning_rate": 3.4584695776823735e-05, + "loss": 3.5813, + "step": 941568 + }, + { + "epoch": 1.01, + "learning_rate": 3.4576326208116945e-05, + "loss": 3.5939, + "step": 942080 + }, + { + "epoch": 1.01, + "learning_rate": 3.4567940260606425e-05, + "loss": 3.5833, + "step": 942592 + }, + { + "epoch": 1.01, + "learning_rate": 3.4559554313095904e-05, + "loss": 3.5801, + "step": 943104 + }, + { + "epoch": 1.01, + "learning_rate": 3.4551168365585384e-05, + "loss": 3.5731, + "step": 943616 + }, + { + "epoch": 1.01, + "learning_rate": 3.4542782418074864e-05, + "loss": 3.5757, + "step": 944128 + }, + { + "epoch": 1.01, + "learning_rate": 3.4534396470564344e-05, + "loss": 3.5912, + "step": 944640 + }, + { + "epoch": 1.01, + "learning_rate": 3.4526026901857553e-05, + "loss": 3.5672, + "step": 945152 + }, + { + "epoch": 1.01, + "learning_rate": 3.4517640954347033e-05, + "loss": 3.5972, + "step": 945664 + }, + { + "epoch": 1.01, + "learning_rate": 3.450925500683651e-05, + "loss": 3.5806, + "step": 946176 + }, + { + "epoch": 1.01, + "learning_rate": 3.450086905932599e-05, + "loss": 3.5726, + "step": 946688 + }, + { + "epoch": 1.01, + "learning_rate": 3.449248311181548e-05, + "loss": 3.6019, + "step": 947200 + }, + { + "epoch": 1.01, + "learning_rate": 3.448411354310869e-05, + "loss": 3.5712, + "step": 947712 + }, + { + "epoch": 1.01, + "learning_rate": 3.447572759559817e-05, + "loss": 3.5735, + "step": 948224 + }, + { + "epoch": 1.01, + "learning_rate": 3.446734164808765e-05, + "loss": 3.5821, + "step": 948736 + }, + { + "epoch": 1.01, + "learning_rate": 3.445895570057712e-05, + "loss": 3.5853, + "step": 949248 + }, + { + "epoch": 1.01, + "learning_rate": 3.44505697530666e-05, + "loss": 3.5747, + "step": 949760 + }, + { + "epoch": 1.01, + "learning_rate": 3.444220018435982e-05, + "loss": 3.5846, + "step": 950272 + }, + { + "epoch": 1.01, + "learning_rate": 3.44338142368493e-05, + "loss": 3.5674, + "step": 950784 + }, + { + "epoch": 1.01, + "learning_rate": 3.442542828933877e-05, + "loss": 3.5797, + "step": 951296 + }, + { + "epoch": 1.01, + "learning_rate": 3.441704234182825e-05, + "loss": 3.5841, + "step": 951808 + }, + { + "epoch": 1.01, + "learning_rate": 3.440865639431773e-05, + "loss": 3.5852, + "step": 952320 + }, + { + "epoch": 1.01, + "learning_rate": 3.440028682561095e-05, + "loss": 3.5823, + "step": 952832 + }, + { + "epoch": 1.01, + "learning_rate": 3.439190087810043e-05, + "loss": 3.5855, + "step": 953344 + }, + { + "epoch": 1.01, + "learning_rate": 3.438351493058991e-05, + "loss": 3.6001, + "step": 953856 + }, + { + "epoch": 1.01, + "learning_rate": 3.437512898307939e-05, + "loss": 3.5826, + "step": 954368 + }, + { + "epoch": 1.01, + "learning_rate": 3.436674303556887e-05, + "loss": 3.5942, + "step": 954880 + }, + { + "epoch": 1.01, + "learning_rate": 3.4358373466862076e-05, + "loss": 3.579, + "step": 955392 + }, + { + "epoch": 1.01, + "learning_rate": 3.4349987519351556e-05, + "loss": 3.5811, + "step": 955904 + }, + { + "epoch": 1.01, + "learning_rate": 3.4341601571841036e-05, + "loss": 3.5734, + "step": 956416 + }, + { + "epoch": 1.01, + "learning_rate": 3.4333215624330516e-05, + "loss": 3.5936, + "step": 956928 + }, + { + "epoch": 1.01, + "learning_rate": 3.4324829676819996e-05, + "loss": 3.5872, + "step": 957440 + }, + { + "epoch": 1.01, + "learning_rate": 3.4316460108113205e-05, + "loss": 3.5938, + "step": 957952 + }, + { + "epoch": 1.01, + "learning_rate": 3.4308074160602685e-05, + "loss": 3.5737, + "step": 958464 + }, + { + "epoch": 1.01, + "learning_rate": 3.429968821309217e-05, + "loss": 3.5736, + "step": 958976 + }, + { + "epoch": 1.01, + "learning_rate": 3.429130226558165e-05, + "loss": 3.5769, + "step": 959488 + }, + { + "epoch": 1.01, + "learning_rate": 3.428291631807113e-05, + "loss": 3.585, + "step": 960000 + }, + { + "epoch": 1.01, + "learning_rate": 3.427454674936434e-05, + "loss": 3.5792, + "step": 960512 + }, + { + "epoch": 1.01, + "learning_rate": 3.426616080185382e-05, + "loss": 3.5831, + "step": 961024 + }, + { + "epoch": 1.01, + "learning_rate": 3.42577748543433e-05, + "loss": 3.5766, + "step": 961536 + }, + { + "epoch": 1.02, + "learning_rate": 3.424938890683278e-05, + "loss": 3.5794, + "step": 962048 + }, + { + "epoch": 1.02, + "learning_rate": 3.424101933812599e-05, + "loss": 3.5769, + "step": 962560 + }, + { + "epoch": 1.02, + "learning_rate": 3.423263339061547e-05, + "loss": 3.5775, + "step": 963072 + }, + { + "epoch": 1.02, + "learning_rate": 3.422424744310495e-05, + "loss": 3.5848, + "step": 963584 + }, + { + "epoch": 1.02, + "learning_rate": 3.421586149559443e-05, + "loss": 3.5958, + "step": 964096 + }, + { + "epoch": 1.02, + "learning_rate": 3.420747554808391e-05, + "loss": 3.5834, + "step": 964608 + }, + { + "epoch": 1.02, + "learning_rate": 3.419908960057339e-05, + "loss": 3.574, + "step": 965120 + }, + { + "epoch": 1.02, + "learning_rate": 3.419070365306287e-05, + "loss": 3.5795, + "step": 965632 + }, + { + "epoch": 1.02, + "learning_rate": 3.4182334084356085e-05, + "loss": 3.5756, + "step": 966144 + }, + { + "epoch": 1.02, + "learning_rate": 3.4173948136845565e-05, + "loss": 3.5849, + "step": 966656 + }, + { + "epoch": 1.02, + "learning_rate": 3.4165578568138774e-05, + "loss": 3.5807, + "step": 967168 + }, + { + "epoch": 1.02, + "learning_rate": 3.4157192620628254e-05, + "loss": 3.576, + "step": 967680 + }, + { + "epoch": 1.02, + "learning_rate": 3.4148806673117734e-05, + "loss": 3.5836, + "step": 968192 + }, + { + "epoch": 1.02, + "learning_rate": 3.4140420725607214e-05, + "loss": 3.5798, + "step": 968704 + }, + { + "epoch": 1.02, + "learning_rate": 3.4132034778096694e-05, + "loss": 3.5777, + "step": 969216 + }, + { + "epoch": 1.02, + "learning_rate": 3.4123648830586174e-05, + "loss": 3.5738, + "step": 969728 + }, + { + "epoch": 1.02, + "learning_rate": 3.4115262883075654e-05, + "loss": 3.5721, + "step": 970240 + }, + { + "epoch": 1.02, + "learning_rate": 3.4106876935565134e-05, + "loss": 3.5814, + "step": 970752 + }, + { + "epoch": 1.02, + "learning_rate": 3.409850736685834e-05, + "loss": 3.5807, + "step": 971264 + }, + { + "epoch": 1.02, + "learning_rate": 3.409012141934782e-05, + "loss": 3.574, + "step": 971776 + }, + { + "epoch": 1.02, + "learning_rate": 3.40817354718373e-05, + "loss": 3.5687, + "step": 972288 + }, + { + "epoch": 1.02, + "learning_rate": 3.407334952432679e-05, + "loss": 3.5795, + "step": 972800 + }, + { + "epoch": 1.02, + "learning_rate": 3.406496357681626e-05, + "loss": 3.5879, + "step": 973312 + }, + { + "epoch": 1.02, + "learning_rate": 3.405657762930574e-05, + "loss": 3.5759, + "step": 973824 + }, + { + "epoch": 1.02, + "learning_rate": 3.404819168179522e-05, + "loss": 3.5789, + "step": 974336 + }, + { + "epoch": 1.02, + "learning_rate": 3.40398057342847e-05, + "loss": 3.5898, + "step": 974848 + }, + { + "epoch": 1.02, + "learning_rate": 3.403143616557791e-05, + "loss": 3.5832, + "step": 975360 + }, + { + "epoch": 1.02, + "learning_rate": 3.402305021806739e-05, + "loss": 3.5813, + "step": 975872 + }, + { + "epoch": 1.02, + "learning_rate": 3.401468064936061e-05, + "loss": 3.583, + "step": 976384 + }, + { + "epoch": 1.02, + "learning_rate": 3.400629470185008e-05, + "loss": 3.582, + "step": 976896 + }, + { + "epoch": 1.02, + "learning_rate": 3.399790875433956e-05, + "loss": 3.5841, + "step": 977408 + }, + { + "epoch": 1.02, + "learning_rate": 3.398952280682904e-05, + "loss": 3.5692, + "step": 977920 + }, + { + "epoch": 1.02, + "learning_rate": 3.398113685931853e-05, + "loss": 3.575, + "step": 978432 + }, + { + "epoch": 1.02, + "learning_rate": 3.3972767290611736e-05, + "loss": 3.5767, + "step": 978944 + }, + { + "epoch": 1.02, + "learning_rate": 3.3964381343101216e-05, + "loss": 3.5809, + "step": 979456 + }, + { + "epoch": 1.02, + "learning_rate": 3.3955995395590696e-05, + "loss": 3.5781, + "step": 979968 + }, + { + "epoch": 1.02, + "learning_rate": 3.3947609448080176e-05, + "loss": 3.5768, + "step": 980480 + }, + { + "epoch": 1.02, + "learning_rate": 3.3939223500569656e-05, + "loss": 3.5686, + "step": 980992 + }, + { + "epoch": 1.02, + "learning_rate": 3.3930853931862865e-05, + "loss": 3.5718, + "step": 981504 + }, + { + "epoch": 1.02, + "learning_rate": 3.3922467984352345e-05, + "loss": 3.5874, + "step": 982016 + }, + { + "epoch": 1.02, + "learning_rate": 3.3914082036841825e-05, + "loss": 3.5799, + "step": 982528 + }, + { + "epoch": 1.02, + "learning_rate": 3.3905696089331305e-05, + "loss": 3.5901, + "step": 983040 + }, + { + "epoch": 1.02, + "learning_rate": 3.3897326520624514e-05, + "loss": 3.5687, + "step": 983552 + }, + { + "epoch": 1.02, + "learning_rate": 3.3888940573113994e-05, + "loss": 3.5853, + "step": 984064 + }, + { + "epoch": 1.02, + "learning_rate": 3.388055462560348e-05, + "loss": 3.5835, + "step": 984576 + }, + { + "epoch": 1.02, + "learning_rate": 3.387216867809296e-05, + "loss": 3.5778, + "step": 985088 + }, + { + "epoch": 1.02, + "learning_rate": 3.386378273058244e-05, + "loss": 3.5748, + "step": 985600 + }, + { + "epoch": 1.02, + "learning_rate": 3.385539678307192e-05, + "loss": 3.5808, + "step": 986112 + }, + { + "epoch": 1.02, + "learning_rate": 3.384702721436513e-05, + "loss": 3.5854, + "step": 986624 + }, + { + "epoch": 1.02, + "learning_rate": 3.383864126685461e-05, + "loss": 3.5882, + "step": 987136 + }, + { + "epoch": 1.02, + "learning_rate": 3.383025531934409e-05, + "loss": 3.5749, + "step": 987648 + }, + { + "epoch": 1.02, + "learning_rate": 3.382186937183357e-05, + "loss": 3.5755, + "step": 988160 + }, + { + "epoch": 1.02, + "learning_rate": 3.381349980312678e-05, + "loss": 3.5782, + "step": 988672 + }, + { + "epoch": 1.02, + "learning_rate": 3.380511385561626e-05, + "loss": 3.5753, + "step": 989184 + }, + { + "epoch": 1.02, + "learning_rate": 3.379672790810574e-05, + "loss": 3.5691, + "step": 989696 + }, + { + "epoch": 1.02, + "learning_rate": 3.378834196059522e-05, + "loss": 3.5827, + "step": 990208 + }, + { + "epoch": 1.02, + "learning_rate": 3.37799560130847e-05, + "loss": 3.5771, + "step": 990720 + }, + { + "epoch": 1.02, + "learning_rate": 3.377157006557418e-05, + "loss": 3.585, + "step": 991232 + }, + { + "epoch": 1.02, + "learning_rate": 3.3763200496867395e-05, + "loss": 3.5743, + "step": 991744 + }, + { + "epoch": 1.03, + "eval_loss": 3.8441879749298096, + "eval_runtime": 305.3113, + "eval_samples_per_second": 1249.842, + "eval_steps_per_second": 39.058, + "step": 992160 + } + ], + "logging_steps": 512, + "max_steps": 3052726, + "num_train_epochs": 9223372036854775807, + "save_steps": 10, + "total_flos": 6.90732852762581e+17, + "trial_name": null, + "trial_params": null +}