|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9984559958826558, |
|
"eval_steps": 500, |
|
"global_step": 970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002058672156459084, |
|
"grad_norm": 0.16932366788387299, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.6431, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004117344312918168, |
|
"grad_norm": 0.19655312597751617, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.6827, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006176016469377252, |
|
"grad_norm": 0.17550164461135864, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6469, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008234688625836336, |
|
"grad_norm": 0.15466387569904327, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.6673, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010293360782295419, |
|
"grad_norm": 0.1689433455467224, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.659, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012352032938754504, |
|
"grad_norm": 0.17134369909763336, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6405, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014410705095213588, |
|
"grad_norm": 0.16362226009368896, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.663, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016469377251672673, |
|
"grad_norm": 0.17139802873134613, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.6571, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018528049408131755, |
|
"grad_norm": 0.1583557277917862, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6724, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.020586721564590838, |
|
"grad_norm": 0.1418927162885666, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.6629, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022645393721049924, |
|
"grad_norm": 0.15474237501621246, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.6738, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.024704065877509007, |
|
"grad_norm": 0.1299714297056198, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6195, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02676273803396809, |
|
"grad_norm": 0.1421244591474533, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.6726, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028821410190427176, |
|
"grad_norm": 0.13614629209041595, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.6725, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03088008234688626, |
|
"grad_norm": 0.1408453732728958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6572, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032938754503345345, |
|
"grad_norm": 0.14334328472614288, |
|
"learning_rate": 9.999899472488811e-06, |
|
"loss": 0.6799, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034997426659804425, |
|
"grad_norm": 0.13737936317920685, |
|
"learning_rate": 9.999597894446702e-06, |
|
"loss": 0.6685, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03705609881626351, |
|
"grad_norm": 0.14840838313102722, |
|
"learning_rate": 9.999095279347846e-06, |
|
"loss": 0.649, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0391147709727226, |
|
"grad_norm": 0.16579611599445343, |
|
"learning_rate": 9.998391649648529e-06, |
|
"loss": 0.6383, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.041173443129181676, |
|
"grad_norm": 0.15359951555728912, |
|
"learning_rate": 9.997487036786146e-06, |
|
"loss": 0.6714, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04323211528564076, |
|
"grad_norm": 0.14463751018047333, |
|
"learning_rate": 9.996381481177804e-06, |
|
"loss": 0.6824, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04529078744209985, |
|
"grad_norm": 0.1506095975637436, |
|
"learning_rate": 9.995075032218501e-06, |
|
"loss": 0.6714, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04734945959855893, |
|
"grad_norm": 0.15253892540931702, |
|
"learning_rate": 9.993567748278931e-06, |
|
"loss": 0.6719, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.049408131755018014, |
|
"grad_norm": 0.1528758406639099, |
|
"learning_rate": 9.991859696702876e-06, |
|
"loss": 0.6337, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0514668039114771, |
|
"grad_norm": 0.147063210606575, |
|
"learning_rate": 9.98995095380419e-06, |
|
"loss": 0.694, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05352547606793618, |
|
"grad_norm": 0.13172896206378937, |
|
"learning_rate": 9.987841604863393e-06, |
|
"loss": 0.6552, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.055584148224395266, |
|
"grad_norm": 0.15532605350017548, |
|
"learning_rate": 9.985531744123863e-06, |
|
"loss": 0.6665, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05764282038085435, |
|
"grad_norm": 0.153465136885643, |
|
"learning_rate": 9.983021474787623e-06, |
|
"loss": 0.6723, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 0.15754538774490356, |
|
"learning_rate": 9.980310909010732e-06, |
|
"loss": 0.6681, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06176016469377252, |
|
"grad_norm": 0.14468301832675934, |
|
"learning_rate": 9.97740016789827e-06, |
|
"loss": 0.6462, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0638188368502316, |
|
"grad_norm": 0.13467183709144592, |
|
"learning_rate": 9.974289381498927e-06, |
|
"loss": 0.6647, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06587750900669069, |
|
"grad_norm": 0.1391790211200714, |
|
"learning_rate": 9.970978688799202e-06, |
|
"loss": 0.6668, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06793618116314977, |
|
"grad_norm": 0.13762535154819489, |
|
"learning_rate": 9.967468237717179e-06, |
|
"loss": 0.6503, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06999485331960885, |
|
"grad_norm": 0.14493001997470856, |
|
"learning_rate": 9.963758185095935e-06, |
|
"loss": 0.6573, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07205352547606794, |
|
"grad_norm": 0.13914668560028076, |
|
"learning_rate": 9.95984869669651e-06, |
|
"loss": 0.6595, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07411219763252702, |
|
"grad_norm": 0.14349870383739471, |
|
"learning_rate": 9.95573994719053e-06, |
|
"loss": 0.6477, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0761708697889861, |
|
"grad_norm": 0.14882634580135345, |
|
"learning_rate": 9.95143212015237e-06, |
|
"loss": 0.676, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0782295419454452, |
|
"grad_norm": 0.1246320977807045, |
|
"learning_rate": 9.94692540805098e-06, |
|
"loss": 0.6445, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08028821410190427, |
|
"grad_norm": 0.14476200938224792, |
|
"learning_rate": 9.942220012241274e-06, |
|
"loss": 0.6721, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08234688625836335, |
|
"grad_norm": 0.1350948065519333, |
|
"learning_rate": 9.937316142955129e-06, |
|
"loss": 0.6467, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08440555841482245, |
|
"grad_norm": 0.14155423641204834, |
|
"learning_rate": 9.932214019292002e-06, |
|
"loss": 0.629, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08646423057128153, |
|
"grad_norm": 0.14425547420978546, |
|
"learning_rate": 9.926913869209132e-06, |
|
"loss": 0.6652, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0885229027277406, |
|
"grad_norm": 0.14249692857265472, |
|
"learning_rate": 9.921415929511367e-06, |
|
"loss": 0.6534, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0905815748841997, |
|
"grad_norm": 0.13896256685256958, |
|
"learning_rate": 9.915720445840566e-06, |
|
"loss": 0.6697, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09264024704065878, |
|
"grad_norm": 0.12779143452644348, |
|
"learning_rate": 9.909827672664642e-06, |
|
"loss": 0.6411, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09469891919711786, |
|
"grad_norm": 0.13794159889221191, |
|
"learning_rate": 9.90373787326618e-06, |
|
"loss": 0.6512, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09675759135357695, |
|
"grad_norm": 0.1406450718641281, |
|
"learning_rate": 9.897451319730686e-06, |
|
"loss": 0.6267, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09881626351003603, |
|
"grad_norm": 0.1331593543291092, |
|
"learning_rate": 9.890968292934413e-06, |
|
"loss": 0.6478, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10087493566649511, |
|
"grad_norm": 0.1347612887620926, |
|
"learning_rate": 9.884289082531829e-06, |
|
"loss": 0.6762, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1029336078229542, |
|
"grad_norm": 0.15122374892234802, |
|
"learning_rate": 9.877413986942668e-06, |
|
"loss": 0.6322, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10499227997941328, |
|
"grad_norm": 0.1411016583442688, |
|
"learning_rate": 9.870343313338594e-06, |
|
"loss": 0.6832, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10705095213587236, |
|
"grad_norm": 0.14521309733390808, |
|
"learning_rate": 9.863077377629484e-06, |
|
"loss": 0.66, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10910962429233145, |
|
"grad_norm": 0.1375553160905838, |
|
"learning_rate": 9.855616504449308e-06, |
|
"loss": 0.6558, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11116829644879053, |
|
"grad_norm": 0.16293329000473022, |
|
"learning_rate": 9.847961027141625e-06, |
|
"loss": 0.6613, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11322696860524961, |
|
"grad_norm": 0.14329375326633453, |
|
"learning_rate": 9.840111287744696e-06, |
|
"loss": 0.6355, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1152856407617087, |
|
"grad_norm": 0.14197561144828796, |
|
"learning_rate": 9.832067636976193e-06, |
|
"loss": 0.6418, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11734431291816778, |
|
"grad_norm": 0.14646419882774353, |
|
"learning_rate": 9.823830434217533e-06, |
|
"loss": 0.646, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 0.15349656343460083, |
|
"learning_rate": 9.815400047497829e-06, |
|
"loss": 0.6742, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12146165723108594, |
|
"grad_norm": 0.13744938373565674, |
|
"learning_rate": 9.80677685347743e-06, |
|
"loss": 0.6418, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12352032938754504, |
|
"grad_norm": 0.152848482131958, |
|
"learning_rate": 9.797961237431106e-06, |
|
"loss": 0.6962, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12557900154400412, |
|
"grad_norm": 0.1377212554216385, |
|
"learning_rate": 9.788953593230835e-06, |
|
"loss": 0.6485, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1276376737004632, |
|
"grad_norm": 0.14834953844547272, |
|
"learning_rate": 9.779754323328192e-06, |
|
"loss": 0.6509, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12969634585692227, |
|
"grad_norm": 0.1412557065486908, |
|
"learning_rate": 9.770363838736382e-06, |
|
"loss": 0.6409, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13175501801338138, |
|
"grad_norm": 0.13826924562454224, |
|
"learning_rate": 9.760782559011871e-06, |
|
"loss": 0.6635, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13381369016984046, |
|
"grad_norm": 0.15501998364925385, |
|
"learning_rate": 9.751010912235635e-06, |
|
"loss": 0.6759, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13587236232629954, |
|
"grad_norm": 0.1448049247264862, |
|
"learning_rate": 9.741049334994047e-06, |
|
"loss": 0.6692, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 0.16344545781612396, |
|
"learning_rate": 9.730898272359358e-06, |
|
"loss": 0.6603, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1399897066392177, |
|
"grad_norm": 0.13649916648864746, |
|
"learning_rate": 9.720558177869817e-06, |
|
"loss": 0.6554, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14204837879567678, |
|
"grad_norm": 0.14864695072174072, |
|
"learning_rate": 9.71002951350941e-06, |
|
"loss": 0.6478, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14410705095213588, |
|
"grad_norm": 0.13324975967407227, |
|
"learning_rate": 9.699312749687211e-06, |
|
"loss": 0.6623, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14616572310859496, |
|
"grad_norm": 0.12993821501731873, |
|
"learning_rate": 9.688408365216379e-06, |
|
"loss": 0.6548, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14822439526505404, |
|
"grad_norm": 0.15033897757530212, |
|
"learning_rate": 9.677316847292746e-06, |
|
"loss": 0.6316, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15028306742151312, |
|
"grad_norm": 0.14298583567142487, |
|
"learning_rate": 9.666038691473067e-06, |
|
"loss": 0.6864, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1523417395779722, |
|
"grad_norm": 0.13992749154567719, |
|
"learning_rate": 9.654574401652864e-06, |
|
"loss": 0.6843, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15440041173443128, |
|
"grad_norm": 0.12663426995277405, |
|
"learning_rate": 9.64292449004393e-06, |
|
"loss": 0.6492, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1564590838908904, |
|
"grad_norm": 0.14738014340400696, |
|
"learning_rate": 9.631089477151428e-06, |
|
"loss": 0.6499, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15851775604734947, |
|
"grad_norm": 0.12475479394197464, |
|
"learning_rate": 9.619069891750642e-06, |
|
"loss": 0.6465, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16057642820380855, |
|
"grad_norm": 0.13470493257045746, |
|
"learning_rate": 9.606866270863352e-06, |
|
"loss": 0.6282, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16263510036026763, |
|
"grad_norm": 0.13865548372268677, |
|
"learning_rate": 9.594479159733841e-06, |
|
"loss": 0.6694, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1646937725167267, |
|
"grad_norm": 0.12641094624996185, |
|
"learning_rate": 9.581909111804534e-06, |
|
"loss": 0.6569, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16675244467318578, |
|
"grad_norm": 0.15122997760772705, |
|
"learning_rate": 9.569156688691267e-06, |
|
"loss": 0.6489, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1688111168296449, |
|
"grad_norm": 0.13927607238292694, |
|
"learning_rate": 9.556222460158196e-06, |
|
"loss": 0.6323, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17086978898610397, |
|
"grad_norm": 0.13584642112255096, |
|
"learning_rate": 9.543107004092351e-06, |
|
"loss": 0.6404, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17292846114256305, |
|
"grad_norm": 0.15319298207759857, |
|
"learning_rate": 9.529810906477795e-06, |
|
"loss": 0.6286, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17498713329902213, |
|
"grad_norm": 0.14342650771141052, |
|
"learning_rate": 9.516334761369467e-06, |
|
"loss": 0.6528, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1770458054554812, |
|
"grad_norm": 0.13270069658756256, |
|
"learning_rate": 9.50267917086662e-06, |
|
"loss": 0.6755, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 0.1380058228969574, |
|
"learning_rate": 9.488844745085935e-06, |
|
"loss": 0.6686, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1811631497683994, |
|
"grad_norm": 0.1321735680103302, |
|
"learning_rate": 9.474832102134254e-06, |
|
"loss": 0.6312, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18322182192485847, |
|
"grad_norm": 0.17360003292560577, |
|
"learning_rate": 9.460641868080961e-06, |
|
"loss": 0.6722, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18528049408131755, |
|
"grad_norm": 0.14511004090309143, |
|
"learning_rate": 9.446274676930022e-06, |
|
"loss": 0.6567, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18733916623777663, |
|
"grad_norm": 0.1359574794769287, |
|
"learning_rate": 9.43173117059164e-06, |
|
"loss": 0.6462, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1893978383942357, |
|
"grad_norm": 0.14268562197685242, |
|
"learning_rate": 9.417011998853596e-06, |
|
"loss": 0.6531, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1914565105506948, |
|
"grad_norm": 0.14946860074996948, |
|
"learning_rate": 9.4021178193522e-06, |
|
"loss": 0.6442, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1935151827071539, |
|
"grad_norm": 0.13256080448627472, |
|
"learning_rate": 9.387049297542914e-06, |
|
"loss": 0.6528, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19557385486361298, |
|
"grad_norm": 0.15630853176116943, |
|
"learning_rate": 9.371807106670628e-06, |
|
"loss": 0.6434, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19763252702007206, |
|
"grad_norm": 0.1315910816192627, |
|
"learning_rate": 9.356391927739569e-06, |
|
"loss": 0.6463, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19969119917653114, |
|
"grad_norm": 0.14017589390277863, |
|
"learning_rate": 9.340804449482882e-06, |
|
"loss": 0.6752, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.20174987133299022, |
|
"grad_norm": 0.16451334953308105, |
|
"learning_rate": 9.325045368331851e-06, |
|
"loss": 0.659, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2038085434894493, |
|
"grad_norm": 0.14280985295772552, |
|
"learning_rate": 9.309115388384793e-06, |
|
"loss": 0.6447, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2058672156459084, |
|
"grad_norm": 0.15770740807056427, |
|
"learning_rate": 9.293015221375588e-06, |
|
"loss": 0.6925, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20792588780236748, |
|
"grad_norm": 0.16106918454170227, |
|
"learning_rate": 9.27674558664189e-06, |
|
"loss": 0.6865, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.20998455995882656, |
|
"grad_norm": 0.13006511330604553, |
|
"learning_rate": 9.260307211092987e-06, |
|
"loss": 0.6452, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21204323211528564, |
|
"grad_norm": 0.14992979168891907, |
|
"learning_rate": 9.243700829177307e-06, |
|
"loss": 0.6787, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21410190427174472, |
|
"grad_norm": 0.1523681879043579, |
|
"learning_rate": 9.226927182849631e-06, |
|
"loss": 0.6534, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2161605764282038, |
|
"grad_norm": 0.14899896085262299, |
|
"learning_rate": 9.209987021537921e-06, |
|
"loss": 0.6547, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2182192485846629, |
|
"grad_norm": 0.14096584916114807, |
|
"learning_rate": 9.192881102109848e-06, |
|
"loss": 0.647, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22027792074112199, |
|
"grad_norm": 0.1324293166399002, |
|
"learning_rate": 9.17561018883897e-06, |
|
"loss": 0.6622, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22233659289758106, |
|
"grad_norm": 0.14914196729660034, |
|
"learning_rate": 9.158175053370585e-06, |
|
"loss": 0.6642, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22439526505404014, |
|
"grad_norm": 0.1294255554676056, |
|
"learning_rate": 9.140576474687263e-06, |
|
"loss": 0.6697, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22645393721049922, |
|
"grad_norm": 0.15517495572566986, |
|
"learning_rate": 9.122815239074034e-06, |
|
"loss": 0.6626, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2285126093669583, |
|
"grad_norm": 0.15496940910816193, |
|
"learning_rate": 9.104892140083257e-06, |
|
"loss": 0.6505, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2305712815234174, |
|
"grad_norm": 0.13310006260871887, |
|
"learning_rate": 9.086807978499167e-06, |
|
"loss": 0.6519, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2326299536798765, |
|
"grad_norm": 0.1449541598558426, |
|
"learning_rate": 9.068563562302106e-06, |
|
"loss": 0.6604, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23468862583633557, |
|
"grad_norm": 0.1366063952445984, |
|
"learning_rate": 9.050159706632403e-06, |
|
"loss": 0.6431, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23674729799279465, |
|
"grad_norm": 0.15107031166553497, |
|
"learning_rate": 9.031597233753975e-06, |
|
"loss": 0.6639, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 0.14455774426460266, |
|
"learning_rate": 9.012876973017578e-06, |
|
"loss": 0.6661, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2408646423057128, |
|
"grad_norm": 0.13330498337745667, |
|
"learning_rate": 8.993999760823753e-06, |
|
"loss": 0.6575, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24292331446217189, |
|
"grad_norm": 0.13691799342632294, |
|
"learning_rate": 8.974966440585461e-06, |
|
"loss": 0.6786, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.244981986618631, |
|
"grad_norm": 0.1397402137517929, |
|
"learning_rate": 8.955777862690397e-06, |
|
"loss": 0.6583, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24704065877509007, |
|
"grad_norm": 0.1374320238828659, |
|
"learning_rate": 8.936434884462995e-06, |
|
"loss": 0.6721, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24909933093154915, |
|
"grad_norm": 0.15251614153385162, |
|
"learning_rate": 8.916938370126127e-06, |
|
"loss": 0.6693, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.25115800308800823, |
|
"grad_norm": 0.1461092084646225, |
|
"learning_rate": 8.897289190762488e-06, |
|
"loss": 0.6599, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25321667524446734, |
|
"grad_norm": 0.1487017124891281, |
|
"learning_rate": 8.877488224275676e-06, |
|
"loss": 0.6565, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2552753474009264, |
|
"grad_norm": 0.14306996762752533, |
|
"learning_rate": 8.857536355350972e-06, |
|
"loss": 0.6866, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2573340195573855, |
|
"grad_norm": 0.15157969295978546, |
|
"learning_rate": 8.837434475415811e-06, |
|
"loss": 0.6626, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25939269171384455, |
|
"grad_norm": 0.14696666598320007, |
|
"learning_rate": 8.81718348259995e-06, |
|
"loss": 0.6777, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.26145136387030365, |
|
"grad_norm": 0.15098857879638672, |
|
"learning_rate": 8.79678428169535e-06, |
|
"loss": 0.6868, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.26351003602676276, |
|
"grad_norm": 0.15824772417545319, |
|
"learning_rate": 8.776237784115743e-06, |
|
"loss": 0.6542, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2655687081832218, |
|
"grad_norm": 0.13801230490207672, |
|
"learning_rate": 8.755544907855913e-06, |
|
"loss": 0.6728, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2676273803396809, |
|
"grad_norm": 0.13847684860229492, |
|
"learning_rate": 8.734706577450684e-06, |
|
"loss": 0.6426, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26968605249613997, |
|
"grad_norm": 0.15203307569026947, |
|
"learning_rate": 8.713723723933604e-06, |
|
"loss": 0.6381, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2717447246525991, |
|
"grad_norm": 0.1289345920085907, |
|
"learning_rate": 8.692597284795363e-06, |
|
"loss": 0.6587, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27380339680905813, |
|
"grad_norm": 0.13953810930252075, |
|
"learning_rate": 8.671328203941889e-06, |
|
"loss": 0.6473, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.153923898935318, |
|
"learning_rate": 8.649917431652191e-06, |
|
"loss": 0.6685, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.27792074112197634, |
|
"grad_norm": 0.15305279195308685, |
|
"learning_rate": 8.628365924535892e-06, |
|
"loss": 0.6582, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2799794132784354, |
|
"grad_norm": 0.13822555541992188, |
|
"learning_rate": 8.606674645490487e-06, |
|
"loss": 0.6305, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2820380854348945, |
|
"grad_norm": 0.13595739006996155, |
|
"learning_rate": 8.584844563658334e-06, |
|
"loss": 0.6627, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.28409675759135355, |
|
"grad_norm": 0.145783469080925, |
|
"learning_rate": 8.56287665438334e-06, |
|
"loss": 0.6484, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28615542974781266, |
|
"grad_norm": 0.14241014420986176, |
|
"learning_rate": 8.540771899167395e-06, |
|
"loss": 0.6313, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28821410190427177, |
|
"grad_norm": 0.13967099785804749, |
|
"learning_rate": 8.518531285626506e-06, |
|
"loss": 0.671, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2902727740607308, |
|
"grad_norm": 0.13454201817512512, |
|
"learning_rate": 8.496155807446688e-06, |
|
"loss": 0.6626, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2923314462171899, |
|
"grad_norm": 0.14464090764522552, |
|
"learning_rate": 8.473646464339557e-06, |
|
"loss": 0.6537, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.294390118373649, |
|
"grad_norm": 0.1445484757423401, |
|
"learning_rate": 8.451004261997664e-06, |
|
"loss": 0.6588, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2964487905301081, |
|
"grad_norm": 0.13212668895721436, |
|
"learning_rate": 8.428230212049562e-06, |
|
"loss": 0.6209, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.1413717269897461, |
|
"learning_rate": 8.40532533201461e-06, |
|
"loss": 0.6491, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.30056613484302624, |
|
"grad_norm": 0.14644797146320343, |
|
"learning_rate": 8.382290645257518e-06, |
|
"loss": 0.6551, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.30262480699948535, |
|
"grad_norm": 0.1352260410785675, |
|
"learning_rate": 8.359127180942607e-06, |
|
"loss": 0.6731, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3046834791559444, |
|
"grad_norm": 0.1495533436536789, |
|
"learning_rate": 8.335835973987841e-06, |
|
"loss": 0.6717, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3067421513124035, |
|
"grad_norm": 0.15269213914871216, |
|
"learning_rate": 8.312418065018582e-06, |
|
"loss": 0.6738, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.30880082346886256, |
|
"grad_norm": 0.13821226358413696, |
|
"learning_rate": 8.288874500321103e-06, |
|
"loss": 0.6348, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31085949562532167, |
|
"grad_norm": 0.13883084058761597, |
|
"learning_rate": 8.26520633179583e-06, |
|
"loss": 0.6408, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3129181677817808, |
|
"grad_norm": 0.1385805606842041, |
|
"learning_rate": 8.24141461691035e-06, |
|
"loss": 0.6686, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3149768399382398, |
|
"grad_norm": 0.1448226422071457, |
|
"learning_rate": 8.21750041865217e-06, |
|
"loss": 0.6555, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.31703551209469893, |
|
"grad_norm": 0.153200164437294, |
|
"learning_rate": 8.193464805481218e-06, |
|
"loss": 0.6632, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.319094184251158, |
|
"grad_norm": 0.13748742640018463, |
|
"learning_rate": 8.1693088512821e-06, |
|
"loss": 0.6241, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3211528564076171, |
|
"grad_norm": 0.14238247275352478, |
|
"learning_rate": 8.14503363531613e-06, |
|
"loss": 0.6813, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.32321152856407614, |
|
"grad_norm": 0.13559186458587646, |
|
"learning_rate": 8.120640242173107e-06, |
|
"loss": 0.6463, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.32527020072053525, |
|
"grad_norm": 0.1391640305519104, |
|
"learning_rate": 8.096129761722855e-06, |
|
"loss": 0.6454, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.32732887287699436, |
|
"grad_norm": 0.13450267910957336, |
|
"learning_rate": 8.071503289066526e-06, |
|
"loss": 0.6459, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3293875450334534, |
|
"grad_norm": 0.13703607022762299, |
|
"learning_rate": 8.046761924487679e-06, |
|
"loss": 0.645, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3314462171899125, |
|
"grad_norm": 0.12534037232398987, |
|
"learning_rate": 8.021906773403116e-06, |
|
"loss": 0.6535, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.33350488934637157, |
|
"grad_norm": 0.13822266459465027, |
|
"learning_rate": 7.996938946313496e-06, |
|
"loss": 0.6436, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3355635615028307, |
|
"grad_norm": 0.1410212516784668, |
|
"learning_rate": 7.971859558753714e-06, |
|
"loss": 0.635, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3376222336592898, |
|
"grad_norm": 0.13720424473285675, |
|
"learning_rate": 7.946669731243064e-06, |
|
"loss": 0.6512, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33968090581574883, |
|
"grad_norm": 0.14550641179084778, |
|
"learning_rate": 7.921370589235178e-06, |
|
"loss": 0.662, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.34173957797220794, |
|
"grad_norm": 0.15036438405513763, |
|
"learning_rate": 7.895963263067734e-06, |
|
"loss": 0.6653, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.343798250128667, |
|
"grad_norm": 0.14174431562423706, |
|
"learning_rate": 7.870448887911961e-06, |
|
"loss": 0.6855, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3458569222851261, |
|
"grad_norm": 0.13360486924648285, |
|
"learning_rate": 7.844828603721923e-06, |
|
"loss": 0.6621, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34791559444158515, |
|
"grad_norm": 0.15754815936088562, |
|
"learning_rate": 7.819103555183575e-06, |
|
"loss": 0.6709, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.34997426659804426, |
|
"grad_norm": 0.13917994499206543, |
|
"learning_rate": 7.79327489166363e-06, |
|
"loss": 0.6765, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.35203293875450337, |
|
"grad_norm": 0.1421346664428711, |
|
"learning_rate": 7.767343767158201e-06, |
|
"loss": 0.6689, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3540916109109624, |
|
"grad_norm": 0.14150221645832062, |
|
"learning_rate": 7.741311340241256e-06, |
|
"loss": 0.6553, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3561502830674215, |
|
"grad_norm": 0.13491779565811157, |
|
"learning_rate": 7.715178774012825e-06, |
|
"loss": 0.6792, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 0.14319780468940735, |
|
"learning_rate": 7.688947236047066e-06, |
|
"loss": 0.6313, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3602676273803397, |
|
"grad_norm": 0.14893461763858795, |
|
"learning_rate": 7.662617898340077e-06, |
|
"loss": 0.6551, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3623262995367988, |
|
"grad_norm": 0.13200290501117706, |
|
"learning_rate": 7.636191937257545e-06, |
|
"loss": 0.6639, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.36438497169325784, |
|
"grad_norm": 0.12563662230968475, |
|
"learning_rate": 7.609670533482181e-06, |
|
"loss": 0.6628, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36644364384971695, |
|
"grad_norm": 0.13975931704044342, |
|
"learning_rate": 7.583054871960968e-06, |
|
"loss": 0.6412, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.368502316006176, |
|
"grad_norm": 0.1629699319601059, |
|
"learning_rate": 7.556346141852221e-06, |
|
"loss": 0.6511, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3705609881626351, |
|
"grad_norm": 0.1409747153520584, |
|
"learning_rate": 7.529545536472459e-06, |
|
"loss": 0.6096, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37261966031909416, |
|
"grad_norm": 0.15238557755947113, |
|
"learning_rate": 7.502654253243084e-06, |
|
"loss": 0.623, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.37467833247555327, |
|
"grad_norm": 0.14103642106056213, |
|
"learning_rate": 7.475673493636889e-06, |
|
"loss": 0.6397, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3767370046320124, |
|
"grad_norm": 0.14412514865398407, |
|
"learning_rate": 7.448604463124365e-06, |
|
"loss": 0.6388, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3787956767884714, |
|
"grad_norm": 0.14426256716251373, |
|
"learning_rate": 7.4214483711198535e-06, |
|
"loss": 0.6335, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.38085434894493053, |
|
"grad_norm": 0.15213246643543243, |
|
"learning_rate": 7.394206430927509e-06, |
|
"loss": 0.6639, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3829130211013896, |
|
"grad_norm": 0.1403135508298874, |
|
"learning_rate": 7.366879859687083e-06, |
|
"loss": 0.6571, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3849716932578487, |
|
"grad_norm": 0.12427900731563568, |
|
"learning_rate": 7.339469878319554e-06, |
|
"loss": 0.6428, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3870303654143078, |
|
"grad_norm": 0.13845951855182648, |
|
"learning_rate": 7.311977711472569e-06, |
|
"loss": 0.654, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.38908903757076685, |
|
"grad_norm": 0.13073399662971497, |
|
"learning_rate": 7.284404587465733e-06, |
|
"loss": 0.6454, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.39114770972722596, |
|
"grad_norm": 0.14320863783359528, |
|
"learning_rate": 7.2567517382357265e-06, |
|
"loss": 0.6602, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.393206381883685, |
|
"grad_norm": 0.12681038677692413, |
|
"learning_rate": 7.229020399281268e-06, |
|
"loss": 0.6597, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3952650540401441, |
|
"grad_norm": 0.13371235132217407, |
|
"learning_rate": 7.201211809607903e-06, |
|
"loss": 0.6425, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39732372619660317, |
|
"grad_norm": 0.13812264800071716, |
|
"learning_rate": 7.17332721167266e-06, |
|
"loss": 0.6728, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3993823983530623, |
|
"grad_norm": 0.158253014087677, |
|
"learning_rate": 7.145367851328531e-06, |
|
"loss": 0.6973, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4014410705095214, |
|
"grad_norm": 0.15043658018112183, |
|
"learning_rate": 7.1173349777688075e-06, |
|
"loss": 0.6485, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.40349974266598043, |
|
"grad_norm": 0.14149758219718933, |
|
"learning_rate": 7.089229843471276e-06, |
|
"loss": 0.6643, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.40555841482243954, |
|
"grad_norm": 0.16252540051937103, |
|
"learning_rate": 7.061053704142244e-06, |
|
"loss": 0.6628, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4076170869788986, |
|
"grad_norm": 0.14584672451019287, |
|
"learning_rate": 7.032807818660451e-06, |
|
"loss": 0.644, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4096757591353577, |
|
"grad_norm": 0.13865599036216736, |
|
"learning_rate": 7.004493449020818e-06, |
|
"loss": 0.6649, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4117344312918168, |
|
"grad_norm": 0.14170795679092407, |
|
"learning_rate": 6.976111860278061e-06, |
|
"loss": 0.6651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.14801864326000214, |
|
"learning_rate": 6.947664320490169e-06, |
|
"loss": 0.6367, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.41585177560473496, |
|
"grad_norm": 0.16109682619571686, |
|
"learning_rate": 6.919152100661758e-06, |
|
"loss": 0.6734, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 0.144635409116745, |
|
"learning_rate": 6.890576474687264e-06, |
|
"loss": 0.6564, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4199691199176531, |
|
"grad_norm": 0.14229914546012878, |
|
"learning_rate": 6.8619387192940516e-06, |
|
"loss": 0.6759, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4220277920741122, |
|
"grad_norm": 0.1457197219133377, |
|
"learning_rate": 6.8332401139853545e-06, |
|
"loss": 0.6601, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4240864642305713, |
|
"grad_norm": 0.14699086546897888, |
|
"learning_rate": 6.804481940983111e-06, |
|
"loss": 0.6723, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4261451363870304, |
|
"grad_norm": 0.13042482733726501, |
|
"learning_rate": 6.775665485170681e-06, |
|
"loss": 0.6187, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42820380854348944, |
|
"grad_norm": 0.16707850992679596, |
|
"learning_rate": 6.746792034035437e-06, |
|
"loss": 0.656, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.43026248069994855, |
|
"grad_norm": 0.13840922713279724, |
|
"learning_rate": 6.7178628776112355e-06, |
|
"loss": 0.6637, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.4323211528564076, |
|
"grad_norm": 0.13450464606285095, |
|
"learning_rate": 6.6888793084207885e-06, |
|
"loss": 0.6457, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4343798250128667, |
|
"grad_norm": 0.15876543521881104, |
|
"learning_rate": 6.6598426214179045e-06, |
|
"loss": 0.6644, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4364384971693258, |
|
"grad_norm": 0.1447768658399582, |
|
"learning_rate": 6.6307541139296415e-06, |
|
"loss": 0.6595, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43849716932578486, |
|
"grad_norm": 0.14029446244239807, |
|
"learning_rate": 6.60161508559834e-06, |
|
"loss": 0.6564, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.44055584148224397, |
|
"grad_norm": 0.14148728549480438, |
|
"learning_rate": 6.57242683832355e-06, |
|
"loss": 0.6398, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.442614513638703, |
|
"grad_norm": 0.15543389320373535, |
|
"learning_rate": 6.543190676203877e-06, |
|
"loss": 0.6546, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.44467318579516213, |
|
"grad_norm": 0.13774473965168, |
|
"learning_rate": 6.5139079054787055e-06, |
|
"loss": 0.6935, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4467318579516212, |
|
"grad_norm": 0.14050574600696564, |
|
"learning_rate": 6.484579834469838e-06, |
|
"loss": 0.6388, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4487905301080803, |
|
"grad_norm": 0.1455702781677246, |
|
"learning_rate": 6.4552077735230485e-06, |
|
"loss": 0.6726, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4508492022645394, |
|
"grad_norm": 0.15097936987876892, |
|
"learning_rate": 6.425793034949528e-06, |
|
"loss": 0.6568, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.45290787442099845, |
|
"grad_norm": 0.14591309428215027, |
|
"learning_rate": 6.396336932967262e-06, |
|
"loss": 0.6456, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45496654657745755, |
|
"grad_norm": 0.14051714539527893, |
|
"learning_rate": 6.366840783642305e-06, |
|
"loss": 0.6795, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4570252187339166, |
|
"grad_norm": 0.14078466594219208, |
|
"learning_rate": 6.337305904829979e-06, |
|
"loss": 0.6237, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4590838908903757, |
|
"grad_norm": 0.13030259311199188, |
|
"learning_rate": 6.307733616116002e-06, |
|
"loss": 0.6472, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4611425630468348, |
|
"grad_norm": 0.12666334211826324, |
|
"learning_rate": 6.2781252387575206e-06, |
|
"loss": 0.6554, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.46320123520329387, |
|
"grad_norm": 0.13531836867332458, |
|
"learning_rate": 6.248482095624087e-06, |
|
"loss": 0.6548, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.465259907359753, |
|
"grad_norm": 0.14652900397777557, |
|
"learning_rate": 6.218805511138544e-06, |
|
"loss": 0.6849, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.46731857951621203, |
|
"grad_norm": 0.14632223546504974, |
|
"learning_rate": 6.18909681121786e-06, |
|
"loss": 0.6702, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46937725167267114, |
|
"grad_norm": 0.133872389793396, |
|
"learning_rate": 6.15935732321389e-06, |
|
"loss": 0.6188, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4714359238291302, |
|
"grad_norm": 0.133718803524971, |
|
"learning_rate": 6.1295883758540595e-06, |
|
"loss": 0.6622, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4734945959855893, |
|
"grad_norm": 0.14238756895065308, |
|
"learning_rate": 6.0997912991820065e-06, |
|
"loss": 0.671, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4755532681420484, |
|
"grad_norm": 0.13712720572948456, |
|
"learning_rate": 6.069967424498157e-06, |
|
"loss": 0.6622, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 0.13542483747005463, |
|
"learning_rate": 6.0401180843002435e-06, |
|
"loss": 0.683, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47967061245496656, |
|
"grad_norm": 0.1468353122472763, |
|
"learning_rate": 6.01024461222377e-06, |
|
"loss": 0.666, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4817292846114256, |
|
"grad_norm": 0.1414223313331604, |
|
"learning_rate": 5.980348342982424e-06, |
|
"loss": 0.652, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4837879567678847, |
|
"grad_norm": 0.13714027404785156, |
|
"learning_rate": 5.950430612308444e-06, |
|
"loss": 0.654, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.48584662892434377, |
|
"grad_norm": 0.12817879021167755, |
|
"learning_rate": 5.9204927568929446e-06, |
|
"loss": 0.6361, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4879053010808029, |
|
"grad_norm": 0.13338102400302887, |
|
"learning_rate": 5.89053611432619e-06, |
|
"loss": 0.6557, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.489963973237262, |
|
"grad_norm": 0.14501504600048065, |
|
"learning_rate": 5.860562023037834e-06, |
|
"loss": 0.6799, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.49202264539372104, |
|
"grad_norm": 0.13377492129802704, |
|
"learning_rate": 5.83057182223712e-06, |
|
"loss": 0.6598, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.49408131755018014, |
|
"grad_norm": 0.15723681449890137, |
|
"learning_rate": 5.800566851853048e-06, |
|
"loss": 0.6385, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4961399897066392, |
|
"grad_norm": 0.15144281089305878, |
|
"learning_rate": 5.770548452474503e-06, |
|
"loss": 0.6564, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4981986618630983, |
|
"grad_norm": 0.13463923335075378, |
|
"learning_rate": 5.740517965290366e-06, |
|
"loss": 0.6809, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5002573340195574, |
|
"grad_norm": 0.13458071649074554, |
|
"learning_rate": 5.710476732029584e-06, |
|
"loss": 0.6654, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5023160061760165, |
|
"grad_norm": 0.1466810703277588, |
|
"learning_rate": 5.680426094901233e-06, |
|
"loss": 0.6519, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5043746783324755, |
|
"grad_norm": 0.13636760413646698, |
|
"learning_rate": 5.650367396534536e-06, |
|
"loss": 0.637, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5064333504889347, |
|
"grad_norm": 0.13849836587905884, |
|
"learning_rate": 5.620301979918891e-06, |
|
"loss": 0.6548, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5084920226453937, |
|
"grad_norm": 0.13412857055664062, |
|
"learning_rate": 5.590231188343852e-06, |
|
"loss": 0.6706, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5105506948018528, |
|
"grad_norm": 0.12860961258411407, |
|
"learning_rate": 5.560156365339128e-06, |
|
"loss": 0.6136, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5126093669583119, |
|
"grad_norm": 0.12716427445411682, |
|
"learning_rate": 5.530078854614541e-06, |
|
"loss": 0.6531, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.514668039114771, |
|
"grad_norm": 0.13519296050071716, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.6526, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51672671127123, |
|
"grad_norm": 0.15988165140151978, |
|
"learning_rate": 5.469921145385462e-06, |
|
"loss": 0.6177, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5187853834276891, |
|
"grad_norm": 0.1526211053133011, |
|
"learning_rate": 5.4398436346608726e-06, |
|
"loss": 0.6473, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5208440555841483, |
|
"grad_norm": 0.1384144276380539, |
|
"learning_rate": 5.409768811656149e-06, |
|
"loss": 0.6601, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5229027277406073, |
|
"grad_norm": 0.1304050236940384, |
|
"learning_rate": 5.379698020081111e-06, |
|
"loss": 0.6667, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5249613998970664, |
|
"grad_norm": 0.1473226696252823, |
|
"learning_rate": 5.349632603465467e-06, |
|
"loss": 0.6623, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5270200720535255, |
|
"grad_norm": 0.14035162329673767, |
|
"learning_rate": 5.3195739050987695e-06, |
|
"loss": 0.6598, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5290787442099846, |
|
"grad_norm": 0.12912693619728088, |
|
"learning_rate": 5.289523267970417e-06, |
|
"loss": 0.6669, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5311374163664436, |
|
"grad_norm": 0.15704086422920227, |
|
"learning_rate": 5.259482034709637e-06, |
|
"loss": 0.6643, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5331960885229027, |
|
"grad_norm": 0.15738819539546967, |
|
"learning_rate": 5.229451547525499e-06, |
|
"loss": 0.6695, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5352547606793618, |
|
"grad_norm": 0.14451304078102112, |
|
"learning_rate": 5.199433148146954e-06, |
|
"loss": 0.6946, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 0.13368944823741913, |
|
"learning_rate": 5.169428177762882e-06, |
|
"loss": 0.6802, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5393721049922799, |
|
"grad_norm": 0.13744553923606873, |
|
"learning_rate": 5.139437976962168e-06, |
|
"loss": 0.6501, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5414307771487391, |
|
"grad_norm": 0.1409062147140503, |
|
"learning_rate": 5.109463885673812e-06, |
|
"loss": 0.6462, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5434894493051982, |
|
"grad_norm": 0.13469253480434418, |
|
"learning_rate": 5.079507243107057e-06, |
|
"loss": 0.6515, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5455481214616572, |
|
"grad_norm": 0.13426664471626282, |
|
"learning_rate": 5.0495693876915574e-06, |
|
"loss": 0.6475, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5476067936181163, |
|
"grad_norm": 0.14371594786643982, |
|
"learning_rate": 5.019651657017578e-06, |
|
"loss": 0.648, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5496654657745754, |
|
"grad_norm": 0.1432650238275528, |
|
"learning_rate": 4.989755387776231e-06, |
|
"loss": 0.6851, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.15230941772460938, |
|
"learning_rate": 4.959881915699757e-06, |
|
"loss": 0.6654, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5537828100874935, |
|
"grad_norm": 0.13175268471240997, |
|
"learning_rate": 4.930032575501845e-06, |
|
"loss": 0.6681, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5558414822439527, |
|
"grad_norm": 0.131820410490036, |
|
"learning_rate": 4.9002087008179966e-06, |
|
"loss": 0.6463, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5579001544004117, |
|
"grad_norm": 0.14459112286567688, |
|
"learning_rate": 4.870411624145942e-06, |
|
"loss": 0.6762, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5599588265568708, |
|
"grad_norm": 0.14455579221248627, |
|
"learning_rate": 4.840642676786111e-06, |
|
"loss": 0.6613, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.56201749871333, |
|
"grad_norm": 0.14118269085884094, |
|
"learning_rate": 4.81090318878214e-06, |
|
"loss": 0.6571, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.564076170869789, |
|
"grad_norm": 0.13893471658229828, |
|
"learning_rate": 4.781194488861459e-06, |
|
"loss": 0.6875, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5661348430262481, |
|
"grad_norm": 0.14105477929115295, |
|
"learning_rate": 4.751517904375915e-06, |
|
"loss": 0.6367, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5681935151827071, |
|
"grad_norm": 0.14537222683429718, |
|
"learning_rate": 4.721874761242482e-06, |
|
"loss": 0.6349, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5702521873391663, |
|
"grad_norm": 0.1382058560848236, |
|
"learning_rate": 4.692266383884e-06, |
|
"loss": 0.6656, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5723108594956253, |
|
"grad_norm": 0.15627695620059967, |
|
"learning_rate": 4.662694095170023e-06, |
|
"loss": 0.645, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5743695316520844, |
|
"grad_norm": 0.13674962520599365, |
|
"learning_rate": 4.633159216357696e-06, |
|
"loss": 0.6777, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5764282038085435, |
|
"grad_norm": 0.14432717859745026, |
|
"learning_rate": 4.603663067032738e-06, |
|
"loss": 0.6732, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5784868759650026, |
|
"grad_norm": 0.14750322699546814, |
|
"learning_rate": 4.574206965050472e-06, |
|
"loss": 0.644, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5805455481214616, |
|
"grad_norm": 0.13499832153320312, |
|
"learning_rate": 4.544792226476954e-06, |
|
"loss": 0.6534, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5826042202779207, |
|
"grad_norm": 0.14756721258163452, |
|
"learning_rate": 4.515420165530162e-06, |
|
"loss": 0.6328, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5846628924343799, |
|
"grad_norm": 0.13895276188850403, |
|
"learning_rate": 4.486092094521296e-06, |
|
"loss": 0.6573, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5867215645908389, |
|
"grad_norm": 0.14413511753082275, |
|
"learning_rate": 4.456809323796123e-06, |
|
"loss": 0.6686, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.588780236747298, |
|
"grad_norm": 0.13578923046588898, |
|
"learning_rate": 4.427573161676452e-06, |
|
"loss": 0.6282, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5908389089037571, |
|
"grad_norm": 0.14140614867210388, |
|
"learning_rate": 4.398384914401663e-06, |
|
"loss": 0.6761, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5928975810602162, |
|
"grad_norm": 0.12944689393043518, |
|
"learning_rate": 4.36924588607036e-06, |
|
"loss": 0.6515, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5949562532166752, |
|
"grad_norm": 0.13431216776371002, |
|
"learning_rate": 4.3401573785820986e-06, |
|
"loss": 0.6561, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.14476732909679413, |
|
"learning_rate": 4.311120691579214e-06, |
|
"loss": 0.6593, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5990735975295934, |
|
"grad_norm": 0.14597386121749878, |
|
"learning_rate": 4.282137122388765e-06, |
|
"loss": 0.6523, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6011322696860525, |
|
"grad_norm": 0.13887791335582733, |
|
"learning_rate": 4.253207965964565e-06, |
|
"loss": 0.6537, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6031909418425115, |
|
"grad_norm": 0.13218793272972107, |
|
"learning_rate": 4.2243345148293215e-06, |
|
"loss": 0.645, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6052496139989707, |
|
"grad_norm": 0.13599814474582672, |
|
"learning_rate": 4.195518059016891e-06, |
|
"loss": 0.6041, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6073082861554298, |
|
"grad_norm": 0.1398853212594986, |
|
"learning_rate": 4.166759886014649e-06, |
|
"loss": 0.6351, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6093669583118888, |
|
"grad_norm": 0.13316257297992706, |
|
"learning_rate": 4.13806128070595e-06, |
|
"loss": 0.6566, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.611425630468348, |
|
"grad_norm": 0.14696818590164185, |
|
"learning_rate": 4.109423525312738e-06, |
|
"loss": 0.6408, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.613484302624807, |
|
"grad_norm": 0.1275351196527481, |
|
"learning_rate": 4.080847899338244e-06, |
|
"loss": 0.6514, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6155429747812661, |
|
"grad_norm": 0.1334499716758728, |
|
"learning_rate": 4.052335679509831e-06, |
|
"loss": 0.6318, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6176016469377251, |
|
"grad_norm": 0.1320067048072815, |
|
"learning_rate": 4.02388813972194e-06, |
|
"loss": 0.6527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6196603190941843, |
|
"grad_norm": 0.13550327718257904, |
|
"learning_rate": 3.995506550979182e-06, |
|
"loss": 0.6618, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6217189912506433, |
|
"grad_norm": 0.14642766118049622, |
|
"learning_rate": 3.967192181339549e-06, |
|
"loss": 0.6611, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6237776634071024, |
|
"grad_norm": 0.1360124945640564, |
|
"learning_rate": 3.938946295857758e-06, |
|
"loss": 0.6722, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6258363355635616, |
|
"grad_norm": 0.14635741710662842, |
|
"learning_rate": 3.910770156528726e-06, |
|
"loss": 0.6529, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6278950077200206, |
|
"grad_norm": 0.13334567844867706, |
|
"learning_rate": 3.882665022231193e-06, |
|
"loss": 0.6351, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6299536798764797, |
|
"grad_norm": 0.13501949608325958, |
|
"learning_rate": 3.8546321486714714e-06, |
|
"loss": 0.6657, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6320123520329387, |
|
"grad_norm": 0.14261625707149506, |
|
"learning_rate": 3.826672788327341e-06, |
|
"loss": 0.6573, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6340710241893979, |
|
"grad_norm": 0.15793120861053467, |
|
"learning_rate": 3.798788190392099e-06, |
|
"loss": 0.6607, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6361296963458569, |
|
"grad_norm": 0.1266845464706421, |
|
"learning_rate": 3.770979600718734e-06, |
|
"loss": 0.6593, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.638188368502316, |
|
"grad_norm": 0.14376017451286316, |
|
"learning_rate": 3.743248261764274e-06, |
|
"loss": 0.6492, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6402470406587751, |
|
"grad_norm": 0.13482894003391266, |
|
"learning_rate": 3.7155954125342685e-06, |
|
"loss": 0.6721, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6423057128152342, |
|
"grad_norm": 0.13621023297309875, |
|
"learning_rate": 3.688022288527433e-06, |
|
"loss": 0.6793, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6443643849716932, |
|
"grad_norm": 0.13253401219844818, |
|
"learning_rate": 3.6605301216804477e-06, |
|
"loss": 0.653, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6464230571281523, |
|
"grad_norm": 0.14206025004386902, |
|
"learning_rate": 3.633120140312919e-06, |
|
"loss": 0.6668, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6484817292846115, |
|
"grad_norm": 0.14922694861888885, |
|
"learning_rate": 3.605793569072493e-06, |
|
"loss": 0.64, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6505404014410705, |
|
"grad_norm": 0.13803939521312714, |
|
"learning_rate": 3.578551628880148e-06, |
|
"loss": 0.634, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6525990735975296, |
|
"grad_norm": 0.14033062756061554, |
|
"learning_rate": 3.551395536875637e-06, |
|
"loss": 0.6335, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6546577457539887, |
|
"grad_norm": 0.14162582159042358, |
|
"learning_rate": 3.5243265063631125e-06, |
|
"loss": 0.6668, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 0.14522220194339752, |
|
"learning_rate": 3.4973457467569155e-06, |
|
"loss": 0.6433, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6587750900669068, |
|
"grad_norm": 0.13454043865203857, |
|
"learning_rate": 3.4704544635275416e-06, |
|
"loss": 0.6498, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.660833762223366, |
|
"grad_norm": 0.14392301440238953, |
|
"learning_rate": 3.4436538581477796e-06, |
|
"loss": 0.6653, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.662892434379825, |
|
"grad_norm": 0.15344378352165222, |
|
"learning_rate": 3.4169451280390343e-06, |
|
"loss": 0.6941, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6649511065362841, |
|
"grad_norm": 0.14536505937576294, |
|
"learning_rate": 3.3903294665178204e-06, |
|
"loss": 0.6546, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6670097786927431, |
|
"grad_norm": 0.13551141321659088, |
|
"learning_rate": 3.363808062742455e-06, |
|
"loss": 0.6312, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6690684508492023, |
|
"grad_norm": 0.1345638483762741, |
|
"learning_rate": 3.3373821016599236e-06, |
|
"loss": 0.6617, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6711271230056614, |
|
"grad_norm": 0.1479879766702652, |
|
"learning_rate": 3.3110527639529356e-06, |
|
"loss": 0.6536, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6731857951621204, |
|
"grad_norm": 0.14307381212711334, |
|
"learning_rate": 3.2848212259871763e-06, |
|
"loss": 0.6963, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6752444673185796, |
|
"grad_norm": 0.13033559918403625, |
|
"learning_rate": 3.2586886597587466e-06, |
|
"loss": 0.6627, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6773031394750386, |
|
"grad_norm": 0.150541290640831, |
|
"learning_rate": 3.2326562328418e-06, |
|
"loss": 0.6247, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6793618116314977, |
|
"grad_norm": 0.13265936076641083, |
|
"learning_rate": 3.2067251083363714e-06, |
|
"loss": 0.6238, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6814204837879567, |
|
"grad_norm": 0.13777919113636017, |
|
"learning_rate": 3.180896444816427e-06, |
|
"loss": 0.6473, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6834791559444159, |
|
"grad_norm": 0.14997805655002594, |
|
"learning_rate": 3.155171396278078e-06, |
|
"loss": 0.6497, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6855378281008749, |
|
"grad_norm": 0.14183354377746582, |
|
"learning_rate": 3.1295511120880382e-06, |
|
"loss": 0.6601, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.687596500257334, |
|
"grad_norm": 0.14279931783676147, |
|
"learning_rate": 3.104036736932268e-06, |
|
"loss": 0.6766, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.13766002655029297, |
|
"learning_rate": 3.078629410764824e-06, |
|
"loss": 0.6352, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6917138445702522, |
|
"grad_norm": 0.12679381668567657, |
|
"learning_rate": 3.0533302687569368e-06, |
|
"loss": 0.6538, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6937725167267113, |
|
"grad_norm": 0.15894080698490143, |
|
"learning_rate": 3.0281404412462866e-06, |
|
"loss": 0.6452, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6958311888831703, |
|
"grad_norm": 0.16960495710372925, |
|
"learning_rate": 3.0030610536865047e-06, |
|
"loss": 0.645, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6978898610396295, |
|
"grad_norm": 0.13922789692878723, |
|
"learning_rate": 2.978093226596884e-06, |
|
"loss": 0.6361, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6999485331960885, |
|
"grad_norm": 0.14750796556472778, |
|
"learning_rate": 2.953238075512321e-06, |
|
"loss": 0.6561, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7020072053525476, |
|
"grad_norm": 0.14396293461322784, |
|
"learning_rate": 2.928496710933477e-06, |
|
"loss": 0.6676, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7040658775090067, |
|
"grad_norm": 0.12205420434474945, |
|
"learning_rate": 2.9038702382771476e-06, |
|
"loss": 0.6441, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7061245496654658, |
|
"grad_norm": 0.13652381300926208, |
|
"learning_rate": 2.879359757826895e-06, |
|
"loss": 0.653, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7081832218219248, |
|
"grad_norm": 0.13769610226154327, |
|
"learning_rate": 2.854966364683872e-06, |
|
"loss": 0.6436, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7102418939783839, |
|
"grad_norm": 0.12619943916797638, |
|
"learning_rate": 2.8306911487179023e-06, |
|
"loss": 0.6339, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.712300566134843, |
|
"grad_norm": 0.14851100742816925, |
|
"learning_rate": 2.8065351945187837e-06, |
|
"loss": 0.6616, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7143592382913021, |
|
"grad_norm": 0.13649091124534607, |
|
"learning_rate": 2.7824995813478295e-06, |
|
"loss": 0.6575, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 0.1376604586839676, |
|
"learning_rate": 2.7585853830896527e-06, |
|
"loss": 0.6612, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7184765826042203, |
|
"grad_norm": 0.1331382691860199, |
|
"learning_rate": 2.734793668204172e-06, |
|
"loss": 0.671, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7205352547606794, |
|
"grad_norm": 0.16621175408363342, |
|
"learning_rate": 2.7111254996788995e-06, |
|
"loss": 0.6829, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7225939269171384, |
|
"grad_norm": 0.15275327861309052, |
|
"learning_rate": 2.687581934981419e-06, |
|
"loss": 0.6399, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7246525990735976, |
|
"grad_norm": 0.1319313943386078, |
|
"learning_rate": 2.664164026012161e-06, |
|
"loss": 0.6693, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7267112712300566, |
|
"grad_norm": 0.13566693663597107, |
|
"learning_rate": 2.640872819057394e-06, |
|
"loss": 0.6319, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7287699433865157, |
|
"grad_norm": 0.14120978116989136, |
|
"learning_rate": 2.6177093547424826e-06, |
|
"loss": 0.6548, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7308286155429747, |
|
"grad_norm": 0.13419358432292938, |
|
"learning_rate": 2.5946746679853894e-06, |
|
"loss": 0.6424, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7328872876994339, |
|
"grad_norm": 0.14351977407932281, |
|
"learning_rate": 2.57176978795044e-06, |
|
"loss": 0.6382, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.734945959855893, |
|
"grad_norm": 0.1318623423576355, |
|
"learning_rate": 2.548995738002338e-06, |
|
"loss": 0.6725, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.737004632012352, |
|
"grad_norm": 0.12857410311698914, |
|
"learning_rate": 2.5263535356604428e-06, |
|
"loss": 0.6435, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7390633041688112, |
|
"grad_norm": 0.1396513730287552, |
|
"learning_rate": 2.503844192553313e-06, |
|
"loss": 0.6557, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7411219763252702, |
|
"grad_norm": 0.14913763105869293, |
|
"learning_rate": 2.481468714373496e-06, |
|
"loss": 0.6629, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7431806484817293, |
|
"grad_norm": 0.1364419162273407, |
|
"learning_rate": 2.4592281008326075e-06, |
|
"loss": 0.6343, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7452393206381883, |
|
"grad_norm": 0.15360961854457855, |
|
"learning_rate": 2.437123345616661e-06, |
|
"loss": 0.6321, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7472979927946475, |
|
"grad_norm": 0.13863076269626617, |
|
"learning_rate": 2.4151554363416676e-06, |
|
"loss": 0.6363, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7493566649511065, |
|
"grad_norm": 0.13245247304439545, |
|
"learning_rate": 2.3933253545095143e-06, |
|
"loss": 0.6604, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7514153371075656, |
|
"grad_norm": 0.13168726861476898, |
|
"learning_rate": 2.37163407546411e-06, |
|
"loss": 0.6654, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7534740092640247, |
|
"grad_norm": 0.13630832731723785, |
|
"learning_rate": 2.3500825683478096e-06, |
|
"loss": 0.6388, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7555326814204838, |
|
"grad_norm": 0.14733514189720154, |
|
"learning_rate": 2.328671796058113e-06, |
|
"loss": 0.6632, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7575913535769428, |
|
"grad_norm": 0.1482279747724533, |
|
"learning_rate": 2.3074027152046384e-06, |
|
"loss": 0.6596, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7596500257334019, |
|
"grad_norm": 0.14237797260284424, |
|
"learning_rate": 2.286276276066398e-06, |
|
"loss": 0.6673, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7617086978898611, |
|
"grad_norm": 0.1356516182422638, |
|
"learning_rate": 2.265293422549319e-06, |
|
"loss": 0.6612, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7637673700463201, |
|
"grad_norm": 0.14135190844535828, |
|
"learning_rate": 2.2444550921440884e-06, |
|
"loss": 0.659, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7658260422027792, |
|
"grad_norm": 0.1370486319065094, |
|
"learning_rate": 2.2237622158842584e-06, |
|
"loss": 0.6655, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7678847143592383, |
|
"grad_norm": 0.14796264469623566, |
|
"learning_rate": 2.2032157183046515e-06, |
|
"loss": 0.6632, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7699433865156974, |
|
"grad_norm": 0.1222207248210907, |
|
"learning_rate": 2.1828165174000513e-06, |
|
"loss": 0.6472, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7720020586721564, |
|
"grad_norm": 0.12472368031740189, |
|
"learning_rate": 2.162565524584191e-06, |
|
"loss": 0.6539, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7740607308286156, |
|
"grad_norm": 0.1569022238254547, |
|
"learning_rate": 2.142463644649029e-06, |
|
"loss": 0.6456, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 0.13983507454395294, |
|
"learning_rate": 2.1225117757243263e-06, |
|
"loss": 0.6587, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7781780751415337, |
|
"grad_norm": 0.14793013036251068, |
|
"learning_rate": 2.1027108092375147e-06, |
|
"loss": 0.7043, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7802367472979927, |
|
"grad_norm": 0.13598047196865082, |
|
"learning_rate": 2.0830616298738746e-06, |
|
"loss": 0.6327, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7822954194544519, |
|
"grad_norm": 0.150112122297287, |
|
"learning_rate": 2.0635651155370064e-06, |
|
"loss": 0.6451, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.784354091610911, |
|
"grad_norm": 0.1408482939004898, |
|
"learning_rate": 2.0442221373096045e-06, |
|
"loss": 0.6566, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.78641276376737, |
|
"grad_norm": 0.14770856499671936, |
|
"learning_rate": 2.02503355941454e-06, |
|
"loss": 0.6607, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7884714359238292, |
|
"grad_norm": 0.15583674609661102, |
|
"learning_rate": 2.0060002391762477e-06, |
|
"loss": 0.6475, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7905301080802882, |
|
"grad_norm": 0.14111104607582092, |
|
"learning_rate": 1.987123026982423e-06, |
|
"loss": 0.6368, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7925887802367473, |
|
"grad_norm": 0.12933233380317688, |
|
"learning_rate": 1.968402766246026e-06, |
|
"loss": 0.6415, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7946474523932063, |
|
"grad_norm": 0.14513786137104034, |
|
"learning_rate": 1.9498402933676e-06, |
|
"loss": 0.6619, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7967061245496655, |
|
"grad_norm": 0.14459173381328583, |
|
"learning_rate": 1.931436437697896e-06, |
|
"loss": 0.6403, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7987647967061245, |
|
"grad_norm": 0.14979158341884613, |
|
"learning_rate": 1.9131920215008344e-06, |
|
"loss": 0.6416, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8008234688625836, |
|
"grad_norm": 0.1412138193845749, |
|
"learning_rate": 1.895107859916746e-06, |
|
"loss": 0.6314, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8028821410190428, |
|
"grad_norm": 0.1349021941423416, |
|
"learning_rate": 1.8771847609259675e-06, |
|
"loss": 0.6633, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8049408131755018, |
|
"grad_norm": 0.14212565124034882, |
|
"learning_rate": 1.8594235253127373e-06, |
|
"loss": 0.6393, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8069994853319609, |
|
"grad_norm": 0.12094785273075104, |
|
"learning_rate": 1.8418249466294153e-06, |
|
"loss": 0.6438, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8090581574884199, |
|
"grad_norm": 0.1392224282026291, |
|
"learning_rate": 1.8243898111610314e-06, |
|
"loss": 0.6443, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8111168296448791, |
|
"grad_norm": 0.13410909473896027, |
|
"learning_rate": 1.807118897890152e-06, |
|
"loss": 0.6721, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8131755018013381, |
|
"grad_norm": 0.13233357667922974, |
|
"learning_rate": 1.7900129784620798e-06, |
|
"loss": 0.6509, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8152341739577972, |
|
"grad_norm": 0.1279245764017105, |
|
"learning_rate": 1.7730728171503704e-06, |
|
"loss": 0.6591, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8172928461142563, |
|
"grad_norm": 0.1380516141653061, |
|
"learning_rate": 1.7562991708226945e-06, |
|
"loss": 0.6454, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8193515182707154, |
|
"grad_norm": 0.14562861621379852, |
|
"learning_rate": 1.7396927889070164e-06, |
|
"loss": 0.6629, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8214101904271744, |
|
"grad_norm": 0.14138072729110718, |
|
"learning_rate": 1.723254413358111e-06, |
|
"loss": 0.6479, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8234688625836336, |
|
"grad_norm": 0.14233651757240295, |
|
"learning_rate": 1.7069847786244136e-06, |
|
"loss": 0.657, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8255275347400927, |
|
"grad_norm": 0.14190144836902618, |
|
"learning_rate": 1.690884611615209e-06, |
|
"loss": 0.6598, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.14529059827327728, |
|
"learning_rate": 1.67495463166815e-06, |
|
"loss": 0.6573, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8296448790530108, |
|
"grad_norm": 0.13673891127109528, |
|
"learning_rate": 1.6591955505171198e-06, |
|
"loss": 0.6227, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8317035512094699, |
|
"grad_norm": 0.1400144100189209, |
|
"learning_rate": 1.6436080722604314e-06, |
|
"loss": 0.6707, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.833762223365929, |
|
"grad_norm": 0.151056170463562, |
|
"learning_rate": 1.628192893329374e-06, |
|
"loss": 0.6468, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 0.1367853730916977, |
|
"learning_rate": 1.612950702457087e-06, |
|
"loss": 0.6765, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8378795676788472, |
|
"grad_norm": 0.13104775547981262, |
|
"learning_rate": 1.5978821806478027e-06, |
|
"loss": 0.6541, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8399382398353062, |
|
"grad_norm": 0.1309516578912735, |
|
"learning_rate": 1.582988001146405e-06, |
|
"loss": 0.6632, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8419969119917653, |
|
"grad_norm": 0.15327829122543335, |
|
"learning_rate": 1.5682688294083594e-06, |
|
"loss": 0.6623, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8440555841482243, |
|
"grad_norm": 0.12894751131534576, |
|
"learning_rate": 1.5537253230699784e-06, |
|
"loss": 0.6586, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8461142563046835, |
|
"grad_norm": 0.13982105255126953, |
|
"learning_rate": 1.5393581319190382e-06, |
|
"loss": 0.6654, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8481729284611426, |
|
"grad_norm": 0.1252405196428299, |
|
"learning_rate": 1.5251678978657464e-06, |
|
"loss": 0.6316, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8502316006176016, |
|
"grad_norm": 0.12443134933710098, |
|
"learning_rate": 1.511155254914065e-06, |
|
"loss": 0.6814, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8522902727740608, |
|
"grad_norm": 0.13635645806789398, |
|
"learning_rate": 1.4973208291333813e-06, |
|
"loss": 0.65, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8543489449305198, |
|
"grad_norm": 0.18179555237293243, |
|
"learning_rate": 1.4836652386305351e-06, |
|
"loss": 0.6496, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8564076170869789, |
|
"grad_norm": 0.1520225703716278, |
|
"learning_rate": 1.4701890935222062e-06, |
|
"loss": 0.6516, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8584662892434379, |
|
"grad_norm": 0.1353698968887329, |
|
"learning_rate": 1.4568929959076512e-06, |
|
"loss": 0.6652, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8605249613998971, |
|
"grad_norm": 0.13589033484458923, |
|
"learning_rate": 1.4437775398418042e-06, |
|
"loss": 0.6541, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8625836335563561, |
|
"grad_norm": 0.1323903501033783, |
|
"learning_rate": 1.4308433113087346e-06, |
|
"loss": 0.6498, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8646423057128152, |
|
"grad_norm": 0.1342097967863083, |
|
"learning_rate": 1.4180908881954668e-06, |
|
"loss": 0.6643, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8667009778692744, |
|
"grad_norm": 0.14252899587154388, |
|
"learning_rate": 1.405520840266159e-06, |
|
"loss": 0.639, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8687596500257334, |
|
"grad_norm": 0.13460814952850342, |
|
"learning_rate": 1.3931337291366488e-06, |
|
"loss": 0.6562, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8708183221821925, |
|
"grad_norm": 0.12760917842388153, |
|
"learning_rate": 1.3809301082493592e-06, |
|
"loss": 0.6512, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8728769943386516, |
|
"grad_norm": 0.14362888038158417, |
|
"learning_rate": 1.3689105228485739e-06, |
|
"loss": 0.6511, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8749356664951107, |
|
"grad_norm": 0.1235094666481018, |
|
"learning_rate": 1.3570755099560701e-06, |
|
"loss": 0.6529, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8769943386515697, |
|
"grad_norm": 0.14431904256343842, |
|
"learning_rate": 1.3454255983471367e-06, |
|
"loss": 0.6494, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8790530108080288, |
|
"grad_norm": 0.1322588473558426, |
|
"learning_rate": 1.3339613085269357e-06, |
|
"loss": 0.6408, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8811116829644879, |
|
"grad_norm": 0.12662683427333832, |
|
"learning_rate": 1.322683152707255e-06, |
|
"loss": 0.6432, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.883170355120947, |
|
"grad_norm": 0.14211656153202057, |
|
"learning_rate": 1.3115916347836222e-06, |
|
"loss": 0.6364, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.885229027277406, |
|
"grad_norm": 0.13972091674804688, |
|
"learning_rate": 1.3006872503127887e-06, |
|
"loss": 0.6654, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8872876994338652, |
|
"grad_norm": 0.13244371116161346, |
|
"learning_rate": 1.2899704864905922e-06, |
|
"loss": 0.656, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8893463715903243, |
|
"grad_norm": 0.13926206529140472, |
|
"learning_rate": 1.2794418221301842e-06, |
|
"loss": 0.6674, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8914050437467833, |
|
"grad_norm": 0.14457055926322937, |
|
"learning_rate": 1.269101727640644e-06, |
|
"loss": 0.6424, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8934637159032424, |
|
"grad_norm": 0.12490473687648773, |
|
"learning_rate": 1.2589506650059544e-06, |
|
"loss": 0.6623, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.1367175132036209, |
|
"learning_rate": 1.248989087764366e-06, |
|
"loss": 0.6541, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8975810602161606, |
|
"grad_norm": 0.1517862230539322, |
|
"learning_rate": 1.2392174409881311e-06, |
|
"loss": 0.649, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8996397323726196, |
|
"grad_norm": 0.11842308938503265, |
|
"learning_rate": 1.229636161263619e-06, |
|
"loss": 0.6481, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9016984045290788, |
|
"grad_norm": 0.1355244219303131, |
|
"learning_rate": 1.2202456766718092e-06, |
|
"loss": 0.6285, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9037570766855378, |
|
"grad_norm": 0.13762575387954712, |
|
"learning_rate": 1.2110464067691666e-06, |
|
"loss": 0.6547, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9058157488419969, |
|
"grad_norm": 0.12868613004684448, |
|
"learning_rate": 1.2020387625688943e-06, |
|
"loss": 0.6812, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9078744209984559, |
|
"grad_norm": 0.14232338964939117, |
|
"learning_rate": 1.1932231465225714e-06, |
|
"loss": 0.6541, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9099330931549151, |
|
"grad_norm": 0.13460494577884674, |
|
"learning_rate": 1.1845999525021723e-06, |
|
"loss": 0.6328, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9119917653113742, |
|
"grad_norm": 0.14562971889972687, |
|
"learning_rate": 1.1761695657824677e-06, |
|
"loss": 0.6581, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9140504374678332, |
|
"grad_norm": 0.13268987834453583, |
|
"learning_rate": 1.1679323630238087e-06, |
|
"loss": 0.6619, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9161091096242924, |
|
"grad_norm": 0.14215055108070374, |
|
"learning_rate": 1.1598887122553061e-06, |
|
"loss": 0.6404, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9181677817807514, |
|
"grad_norm": 0.14430803060531616, |
|
"learning_rate": 1.1520389728583763e-06, |
|
"loss": 0.6501, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9202264539372105, |
|
"grad_norm": 0.15590347349643707, |
|
"learning_rate": 1.1443834955506942e-06, |
|
"loss": 0.657, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9222851260936696, |
|
"grad_norm": 0.13576243817806244, |
|
"learning_rate": 1.1369226223705176e-06, |
|
"loss": 0.6495, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9243437982501287, |
|
"grad_norm": 0.1325463354587555, |
|
"learning_rate": 1.1296566866614067e-06, |
|
"loss": 0.6725, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9264024704065877, |
|
"grad_norm": 0.1399555504322052, |
|
"learning_rate": 1.1225860130573334e-06, |
|
"loss": 0.5978, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9284611425630468, |
|
"grad_norm": 0.11871365457773209, |
|
"learning_rate": 1.1157109174681713e-06, |
|
"loss": 0.6021, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.930519814719506, |
|
"grad_norm": 0.12837566435337067, |
|
"learning_rate": 1.109031707065588e-06, |
|
"loss": 0.6716, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.932578486875965, |
|
"grad_norm": 0.1489572525024414, |
|
"learning_rate": 1.1025486802693158e-06, |
|
"loss": 0.6626, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9346371590324241, |
|
"grad_norm": 0.12535393238067627, |
|
"learning_rate": 1.0962621267338198e-06, |
|
"loss": 0.6421, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9366958311888832, |
|
"grad_norm": 0.13144055008888245, |
|
"learning_rate": 1.0901723273353599e-06, |
|
"loss": 0.6488, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9387545033453423, |
|
"grad_norm": 0.15396398305892944, |
|
"learning_rate": 1.0842795541594354e-06, |
|
"loss": 0.6482, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9408131755018013, |
|
"grad_norm": 0.14828471839427948, |
|
"learning_rate": 1.078584070488635e-06, |
|
"loss": 0.6516, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9428718476582604, |
|
"grad_norm": 0.1343657374382019, |
|
"learning_rate": 1.0730861307908677e-06, |
|
"loss": 0.6596, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9449305198147195, |
|
"grad_norm": 0.1373317688703537, |
|
"learning_rate": 1.0677859807079994e-06, |
|
"loss": 0.6562, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9469891919711786, |
|
"grad_norm": 0.14464855194091797, |
|
"learning_rate": 1.0626838570448716e-06, |
|
"loss": 0.6652, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9490478641276376, |
|
"grad_norm": 0.14019303023815155, |
|
"learning_rate": 1.057779987758727e-06, |
|
"loss": 0.6602, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9511065362840968, |
|
"grad_norm": 0.1424388289451599, |
|
"learning_rate": 1.0530745919490201e-06, |
|
"loss": 0.6713, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9531652084405559, |
|
"grad_norm": 0.1371728628873825, |
|
"learning_rate": 1.048567879847631e-06, |
|
"loss": 0.6766, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 0.14678040146827698, |
|
"learning_rate": 1.0442600528094722e-06, |
|
"loss": 0.6555, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.957282552753474, |
|
"grad_norm": 0.14888082444667816, |
|
"learning_rate": 1.04015130330349e-06, |
|
"loss": 0.6501, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9593412249099331, |
|
"grad_norm": 0.13902142643928528, |
|
"learning_rate": 1.0362418149040673e-06, |
|
"loss": 0.6462, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9613998970663922, |
|
"grad_norm": 0.14032316207885742, |
|
"learning_rate": 1.0325317622828216e-06, |
|
"loss": 0.66, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9634585692228512, |
|
"grad_norm": 0.1258266568183899, |
|
"learning_rate": 1.0290213112007999e-06, |
|
"loss": 0.6103, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 0.14587683975696564, |
|
"learning_rate": 1.0257106185010746e-06, |
|
"loss": 0.6521, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9675759135357694, |
|
"grad_norm": 0.15115003287792206, |
|
"learning_rate": 1.0225998321017314e-06, |
|
"loss": 0.6454, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9696345856922285, |
|
"grad_norm": 0.157597154378891, |
|
"learning_rate": 1.019689090989268e-06, |
|
"loss": 0.6759, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9716932578486875, |
|
"grad_norm": 0.14675654470920563, |
|
"learning_rate": 1.0169785252123765e-06, |
|
"loss": 0.6572, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9737519300051467, |
|
"grad_norm": 0.14471760392189026, |
|
"learning_rate": 1.0144682558761371e-06, |
|
"loss": 0.6398, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9758106021616058, |
|
"grad_norm": 0.12943477928638458, |
|
"learning_rate": 1.0121583951366075e-06, |
|
"loss": 0.6914, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9778692743180648, |
|
"grad_norm": 0.1375255137681961, |
|
"learning_rate": 1.010049046195811e-06, |
|
"loss": 0.6363, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.979927946474524, |
|
"grad_norm": 0.13072469830513, |
|
"learning_rate": 1.0081403032971245e-06, |
|
"loss": 0.633, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.981986618630983, |
|
"grad_norm": 0.13308100402355194, |
|
"learning_rate": 1.006432251721069e-06, |
|
"loss": 0.643, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9840452907874421, |
|
"grad_norm": 0.15122868120670319, |
|
"learning_rate": 1.0049249677815005e-06, |
|
"loss": 0.656, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9861039629439012, |
|
"grad_norm": 0.12934233248233795, |
|
"learning_rate": 1.0036185188221976e-06, |
|
"loss": 0.6727, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9881626351003603, |
|
"grad_norm": 0.15089304745197296, |
|
"learning_rate": 1.0025129632138545e-06, |
|
"loss": 0.6458, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9902213072568193, |
|
"grad_norm": 0.1326705515384674, |
|
"learning_rate": 1.0016083503514734e-06, |
|
"loss": 0.6371, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9922799794132784, |
|
"grad_norm": 0.13898327946662903, |
|
"learning_rate": 1.0009047206521559e-06, |
|
"loss": 0.6513, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9943386515697376, |
|
"grad_norm": 0.13787615299224854, |
|
"learning_rate": 1.000402105553299e-06, |
|
"loss": 0.6604, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9963973237261966, |
|
"grad_norm": 0.12259743362665176, |
|
"learning_rate": 1.0001005275111895e-06, |
|
"loss": 0.6671, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9984559958826557, |
|
"grad_norm": 0.1266549974679947, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6372, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.002058672156459, |
|
"grad_norm": 0.13590970635414124, |
|
"learning_rate": 5.710476732029584e-06, |
|
"loss": 0.635, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.004117344312918, |
|
"grad_norm": 0.14185404777526855, |
|
"learning_rate": 5.695452505043673e-06, |
|
"loss": 0.6325, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.0061760164693772, |
|
"grad_norm": 0.1392943561077118, |
|
"learning_rate": 5.680426094901233e-06, |
|
"loss": 0.6399, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.0082346886258364, |
|
"grad_norm": 0.13933075964450836, |
|
"learning_rate": 5.665397669443578e-06, |
|
"loss": 0.6378, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.0102933607822955, |
|
"grad_norm": 0.1552516222000122, |
|
"learning_rate": 5.650367396534536e-06, |
|
"loss": 0.6406, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0123520329387545, |
|
"grad_norm": 0.13938720524311066, |
|
"learning_rate": 5.6353354440585676e-06, |
|
"loss": 0.6613, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.0144107050952136, |
|
"grad_norm": 0.13705240190029144, |
|
"learning_rate": 5.620301979918891e-06, |
|
"loss": 0.6763, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.0164693772516726, |
|
"grad_norm": 0.1428154855966568, |
|
"learning_rate": 5.605267172035615e-06, |
|
"loss": 0.6577, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.0185280494081317, |
|
"grad_norm": 0.140982523560524, |
|
"learning_rate": 5.590231188343852e-06, |
|
"loss": 0.64, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.0205867215645907, |
|
"grad_norm": 0.13976819813251495, |
|
"learning_rate": 5.575194196791854e-06, |
|
"loss": 0.6583, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.02264539372105, |
|
"grad_norm": 0.14706793427467346, |
|
"learning_rate": 5.560156365339128e-06, |
|
"loss": 0.6421, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.024704065877509, |
|
"grad_norm": 0.15089593827724457, |
|
"learning_rate": 5.545117861954558e-06, |
|
"loss": 0.6454, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.0267627380339681, |
|
"grad_norm": 0.14291644096374512, |
|
"learning_rate": 5.530078854614541e-06, |
|
"loss": 0.6612, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0288214101904272, |
|
"grad_norm": 0.1260952353477478, |
|
"learning_rate": 5.515039511301097e-06, |
|
"loss": 0.6513, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0308800823468862, |
|
"grad_norm": 0.14070680737495422, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.6398, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0329387545033453, |
|
"grad_norm": 0.14129748940467834, |
|
"learning_rate": 5.484960488698905e-06, |
|
"loss": 0.6323, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.0349974266598043, |
|
"grad_norm": 0.15318261086940765, |
|
"learning_rate": 5.469921145385462e-06, |
|
"loss": 0.6362, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.0370560988162636, |
|
"grad_norm": 0.14378410577774048, |
|
"learning_rate": 5.4548821380454444e-06, |
|
"loss": 0.6475, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.0391147709727226, |
|
"grad_norm": 0.14161469042301178, |
|
"learning_rate": 5.4398436346608726e-06, |
|
"loss": 0.651, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.0411734431291817, |
|
"grad_norm": 0.14112524688243866, |
|
"learning_rate": 5.424805803208147e-06, |
|
"loss": 0.6742, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.0432321152856407, |
|
"grad_norm": 0.13087144494056702, |
|
"learning_rate": 5.409768811656149e-06, |
|
"loss": 0.6566, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.0452907874420998, |
|
"grad_norm": 0.12194062024354935, |
|
"learning_rate": 5.394732827964388e-06, |
|
"loss": 0.659, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.0473494595985589, |
|
"grad_norm": 0.14136454463005066, |
|
"learning_rate": 5.379698020081111e-06, |
|
"loss": 0.6352, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0494081317550181, |
|
"grad_norm": 0.12959690392017365, |
|
"learning_rate": 5.364664555941434e-06, |
|
"loss": 0.679, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0514668039114772, |
|
"grad_norm": 0.1391456127166748, |
|
"learning_rate": 5.349632603465467e-06, |
|
"loss": 0.6519, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0535254760679362, |
|
"grad_norm": 0.12429799884557724, |
|
"learning_rate": 5.334602330556423e-06, |
|
"loss": 0.651, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.0555841482243953, |
|
"grad_norm": 0.1405404508113861, |
|
"learning_rate": 5.3195739050987695e-06, |
|
"loss": 0.6595, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0576428203808543, |
|
"grad_norm": 0.13673624396324158, |
|
"learning_rate": 5.304547494956329e-06, |
|
"loss": 0.6732, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0597014925373134, |
|
"grad_norm": 0.13902150094509125, |
|
"learning_rate": 5.289523267970417e-06, |
|
"loss": 0.6598, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0617601646937724, |
|
"grad_norm": 0.15974578261375427, |
|
"learning_rate": 5.274501391957964e-06, |
|
"loss": 0.6682, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.0638188368502317, |
|
"grad_norm": 0.14193038642406464, |
|
"learning_rate": 5.259482034709637e-06, |
|
"loss": 0.6586, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.0658775090066908, |
|
"grad_norm": 0.1465134620666504, |
|
"learning_rate": 5.244465363987972e-06, |
|
"loss": 0.646, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.0679361811631498, |
|
"grad_norm": 0.13889746367931366, |
|
"learning_rate": 5.229451547525499e-06, |
|
"loss": 0.6395, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.0699948533196089, |
|
"grad_norm": 0.14239132404327393, |
|
"learning_rate": 5.214440753022863e-06, |
|
"loss": 0.6753, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.072053525476068, |
|
"grad_norm": 0.16652631759643555, |
|
"learning_rate": 5.199433148146954e-06, |
|
"loss": 0.6524, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.074112197632527, |
|
"grad_norm": 0.1389884054660797, |
|
"learning_rate": 5.184428900529039e-06, |
|
"loss": 0.635, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.076170869788986, |
|
"grad_norm": 0.13372258841991425, |
|
"learning_rate": 5.169428177762882e-06, |
|
"loss": 0.6306, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.0782295419454453, |
|
"grad_norm": 0.14761124551296234, |
|
"learning_rate": 5.154431147402874e-06, |
|
"loss": 0.6519, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.0802882141019043, |
|
"grad_norm": 0.14540445804595947, |
|
"learning_rate": 5.139437976962168e-06, |
|
"loss": 0.6409, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.0823468862583634, |
|
"grad_norm": 0.15441715717315674, |
|
"learning_rate": 5.124448833910797e-06, |
|
"loss": 0.6712, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0844055584148224, |
|
"grad_norm": 0.1473764181137085, |
|
"learning_rate": 5.109463885673812e-06, |
|
"loss": 0.6775, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0864642305712815, |
|
"grad_norm": 0.1535579264163971, |
|
"learning_rate": 5.094483299629409e-06, |
|
"loss": 0.6507, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0885229027277405, |
|
"grad_norm": 0.1290774792432785, |
|
"learning_rate": 5.079507243107057e-06, |
|
"loss": 0.6693, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0905815748841996, |
|
"grad_norm": 0.1406625360250473, |
|
"learning_rate": 5.0645358833856365e-06, |
|
"loss": 0.6608, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0926402470406589, |
|
"grad_norm": 0.14666709303855896, |
|
"learning_rate": 5.0495693876915574e-06, |
|
"loss": 0.6594, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.094698919197118, |
|
"grad_norm": 0.13475078344345093, |
|
"learning_rate": 5.034607923196911e-06, |
|
"loss": 0.6665, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.096757591353577, |
|
"grad_norm": 0.14045949280261993, |
|
"learning_rate": 5.019651657017578e-06, |
|
"loss": 0.6517, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.098816263510036, |
|
"grad_norm": 0.1452544629573822, |
|
"learning_rate": 5.004700756211388e-06, |
|
"loss": 0.6355, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.100874935666495, |
|
"grad_norm": 0.1488482654094696, |
|
"learning_rate": 4.989755387776231e-06, |
|
"loss": 0.6488, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.1029336078229541, |
|
"grad_norm": 0.11999811977148056, |
|
"learning_rate": 4.974815718648207e-06, |
|
"loss": 0.673, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.1049922799794132, |
|
"grad_norm": 0.1541755348443985, |
|
"learning_rate": 4.959881915699757e-06, |
|
"loss": 0.6715, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.1070509521358725, |
|
"grad_norm": 0.13446086645126343, |
|
"learning_rate": 4.944954145737794e-06, |
|
"loss": 0.653, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.1091096242923315, |
|
"grad_norm": 0.1450193077325821, |
|
"learning_rate": 4.930032575501845e-06, |
|
"loss": 0.6562, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.1111682964487906, |
|
"grad_norm": 0.14609019458293915, |
|
"learning_rate": 4.915117371662189e-06, |
|
"loss": 0.6663, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.1132269686052496, |
|
"grad_norm": 0.139775812625885, |
|
"learning_rate": 4.9002087008179966e-06, |
|
"loss": 0.6191, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1152856407617087, |
|
"grad_norm": 0.15244266390800476, |
|
"learning_rate": 4.885306729495459e-06, |
|
"loss": 0.6603, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.1173443129181677, |
|
"grad_norm": 0.12155251204967499, |
|
"learning_rate": 4.870411624145942e-06, |
|
"loss": 0.6482, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 0.13439474999904633, |
|
"learning_rate": 4.855523551144119e-06, |
|
"loss": 0.6215, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.121461657231086, |
|
"grad_norm": 0.1316884607076645, |
|
"learning_rate": 4.840642676786111e-06, |
|
"loss": 0.6374, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.123520329387545, |
|
"grad_norm": 0.15030314028263092, |
|
"learning_rate": 4.825769167287634e-06, |
|
"loss": 0.6429, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.1255790015440041, |
|
"grad_norm": 0.13496868312358856, |
|
"learning_rate": 4.81090318878214e-06, |
|
"loss": 0.6548, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1276376737004632, |
|
"grad_norm": 0.13917021453380585, |
|
"learning_rate": 4.796044907318961e-06, |
|
"loss": 0.6398, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.1296963458569222, |
|
"grad_norm": 0.1306729018688202, |
|
"learning_rate": 4.781194488861459e-06, |
|
"loss": 0.6619, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.1317550180133813, |
|
"grad_norm": 0.12746194005012512, |
|
"learning_rate": 4.7663520992851585e-06, |
|
"loss": 0.6365, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1338136901698403, |
|
"grad_norm": 0.13715361058712006, |
|
"learning_rate": 4.751517904375915e-06, |
|
"loss": 0.6825, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1358723623262996, |
|
"grad_norm": 0.13766644895076752, |
|
"learning_rate": 4.736692069828044e-06, |
|
"loss": 0.6392, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.1379310344827587, |
|
"grad_norm": 0.13901367783546448, |
|
"learning_rate": 4.721874761242482e-06, |
|
"loss": 0.6571, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.1399897066392177, |
|
"grad_norm": 0.14460472762584686, |
|
"learning_rate": 4.7070661441249266e-06, |
|
"loss": 0.651, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.1420483787956768, |
|
"grad_norm": 0.14462216198444366, |
|
"learning_rate": 4.692266383884e-06, |
|
"loss": 0.6874, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.1441070509521358, |
|
"grad_norm": 0.13282893598079681, |
|
"learning_rate": 4.6774756458293945e-06, |
|
"loss": 0.6513, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.1461657231085949, |
|
"grad_norm": 0.13160084187984467, |
|
"learning_rate": 4.662694095170023e-06, |
|
"loss": 0.6778, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.148224395265054, |
|
"grad_norm": 0.14130820333957672, |
|
"learning_rate": 4.64792189701218e-06, |
|
"loss": 0.6687, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.1502830674215132, |
|
"grad_norm": 0.13801245391368866, |
|
"learning_rate": 4.633159216357696e-06, |
|
"loss": 0.6325, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1523417395779723, |
|
"grad_norm": 0.14570364356040955, |
|
"learning_rate": 4.618406218102093e-06, |
|
"loss": 0.6655, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.1544004117344313, |
|
"grad_norm": 0.14590254426002502, |
|
"learning_rate": 4.603663067032738e-06, |
|
"loss": 0.6625, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1564590838908904, |
|
"grad_norm": 0.12933455407619476, |
|
"learning_rate": 4.588929927827015e-06, |
|
"loss": 0.6512, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.1585177560473494, |
|
"grad_norm": 0.1645902544260025, |
|
"learning_rate": 4.574206965050472e-06, |
|
"loss": 0.6423, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1605764282038085, |
|
"grad_norm": 0.14364786446094513, |
|
"learning_rate": 4.559494343154993e-06, |
|
"loss": 0.6481, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1626351003602675, |
|
"grad_norm": 0.13910852372646332, |
|
"learning_rate": 4.544792226476954e-06, |
|
"loss": 0.6637, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1646937725167268, |
|
"grad_norm": 0.14113080501556396, |
|
"learning_rate": 4.53010077923539e-06, |
|
"loss": 0.643, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1667524446731858, |
|
"grad_norm": 0.13962271809577942, |
|
"learning_rate": 4.515420165530162e-06, |
|
"loss": 0.6271, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.168811116829645, |
|
"grad_norm": 0.15055330097675323, |
|
"learning_rate": 4.500750549340128e-06, |
|
"loss": 0.6424, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.170869788986104, |
|
"grad_norm": 0.13697674870491028, |
|
"learning_rate": 4.486092094521296e-06, |
|
"loss": 0.6274, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.172928461142563, |
|
"grad_norm": 0.13252243399620056, |
|
"learning_rate": 4.471444964805013e-06, |
|
"loss": 0.6423, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.174987133299022, |
|
"grad_norm": 0.13780078291893005, |
|
"learning_rate": 4.456809323796123e-06, |
|
"loss": 0.6397, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.177045805455481, |
|
"grad_norm": 0.141806960105896, |
|
"learning_rate": 4.442185334971148e-06, |
|
"loss": 0.6563, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.1791044776119404, |
|
"grad_norm": 0.14122751355171204, |
|
"learning_rate": 4.427573161676452e-06, |
|
"loss": 0.645, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.1811631497683994, |
|
"grad_norm": 0.14296391606330872, |
|
"learning_rate": 4.412972967126425e-06, |
|
"loss": 0.6743, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1832218219248585, |
|
"grad_norm": 0.14250048995018005, |
|
"learning_rate": 4.398384914401663e-06, |
|
"loss": 0.6527, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1852804940813175, |
|
"grad_norm": 0.15447309613227844, |
|
"learning_rate": 4.383809166447131e-06, |
|
"loss": 0.6253, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.1873391662377766, |
|
"grad_norm": 0.13448724150657654, |
|
"learning_rate": 4.36924588607036e-06, |
|
"loss": 0.6809, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.1893978383942356, |
|
"grad_norm": 0.1386733055114746, |
|
"learning_rate": 4.354695235939618e-06, |
|
"loss": 0.6613, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.1914565105506947, |
|
"grad_norm": 0.13458538055419922, |
|
"learning_rate": 4.3401573785820986e-06, |
|
"loss": 0.6525, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.193515182707154, |
|
"grad_norm": 0.14328433573246002, |
|
"learning_rate": 4.325632476382098e-06, |
|
"loss": 0.659, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.195573854863613, |
|
"grad_norm": 0.15153314173221588, |
|
"learning_rate": 4.311120691579214e-06, |
|
"loss": 0.6261, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.197632527020072, |
|
"grad_norm": 0.136255145072937, |
|
"learning_rate": 4.29662218626652e-06, |
|
"loss": 0.6272, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.199691199176531, |
|
"grad_norm": 0.15096060931682587, |
|
"learning_rate": 4.282137122388765e-06, |
|
"loss": 0.6685, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.2017498713329902, |
|
"grad_norm": 0.14815060794353485, |
|
"learning_rate": 4.267665661740559e-06, |
|
"loss": 0.5883, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.2038085434894492, |
|
"grad_norm": 0.13184097409248352, |
|
"learning_rate": 4.253207965964565e-06, |
|
"loss": 0.6463, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.2058672156459085, |
|
"grad_norm": 0.1358867883682251, |
|
"learning_rate": 4.238764196549697e-06, |
|
"loss": 0.6438, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.2079258878023675, |
|
"grad_norm": 0.15228141844272614, |
|
"learning_rate": 4.2243345148293215e-06, |
|
"loss": 0.6477, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.2099845599588266, |
|
"grad_norm": 0.14477193355560303, |
|
"learning_rate": 4.209919081979435e-06, |
|
"loss": 0.6794, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.2120432321152856, |
|
"grad_norm": 0.15588507056236267, |
|
"learning_rate": 4.195518059016891e-06, |
|
"loss": 0.6412, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.2141019042717447, |
|
"grad_norm": 0.13339221477508545, |
|
"learning_rate": 4.181131606797582e-06, |
|
"loss": 0.6358, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.2161605764282037, |
|
"grad_norm": 0.1495896428823471, |
|
"learning_rate": 4.166759886014649e-06, |
|
"loss": 0.6815, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.218219248584663, |
|
"grad_norm": 0.13854841887950897, |
|
"learning_rate": 4.152403057196686e-06, |
|
"loss": 0.656, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.220277920741122, |
|
"grad_norm": 0.14208003878593445, |
|
"learning_rate": 4.13806128070595e-06, |
|
"loss": 0.6522, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.2223365928975811, |
|
"grad_norm": 0.16169045865535736, |
|
"learning_rate": 4.123734716736566e-06, |
|
"loss": 0.6577, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.2243952650540402, |
|
"grad_norm": 0.13855846226215363, |
|
"learning_rate": 4.109423525312738e-06, |
|
"loss": 0.6519, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.2264539372104992, |
|
"grad_norm": 0.13011138141155243, |
|
"learning_rate": 4.095127866286962e-06, |
|
"loss": 0.6372, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.2285126093669583, |
|
"grad_norm": 0.1390632688999176, |
|
"learning_rate": 4.080847899338244e-06, |
|
"loss": 0.6259, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.2305712815234173, |
|
"grad_norm": 0.15639440715312958, |
|
"learning_rate": 4.066583783970311e-06, |
|
"loss": 0.641, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.2326299536798766, |
|
"grad_norm": 0.12465671449899673, |
|
"learning_rate": 4.052335679509831e-06, |
|
"loss": 0.6595, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.2346886258363357, |
|
"grad_norm": 0.12938636541366577, |
|
"learning_rate": 4.038103745104635e-06, |
|
"loss": 0.6462, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.2367472979927947, |
|
"grad_norm": 0.13924743235111237, |
|
"learning_rate": 4.02388813972194e-06, |
|
"loss": 0.6435, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2388059701492538, |
|
"grad_norm": 0.13049103319644928, |
|
"learning_rate": 4.009689022146569e-06, |
|
"loss": 0.6194, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.2408646423057128, |
|
"grad_norm": 0.13774777948856354, |
|
"learning_rate": 3.995506550979182e-06, |
|
"loss": 0.6673, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.2429233144621719, |
|
"grad_norm": 0.14476458728313446, |
|
"learning_rate": 3.981340884634504e-06, |
|
"loss": 0.6524, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.244981986618631, |
|
"grad_norm": 0.15251493453979492, |
|
"learning_rate": 3.967192181339549e-06, |
|
"loss": 0.6518, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.2470406587750902, |
|
"grad_norm": 0.13875854015350342, |
|
"learning_rate": 3.953060599131867e-06, |
|
"loss": 0.6422, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.2490993309315492, |
|
"grad_norm": 0.13512182235717773, |
|
"learning_rate": 3.938946295857758e-06, |
|
"loss": 0.6467, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.2511580030880083, |
|
"grad_norm": 0.12567898631095886, |
|
"learning_rate": 3.924849429170531e-06, |
|
"loss": 0.6521, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.2532166752444673, |
|
"grad_norm": 0.13632598519325256, |
|
"learning_rate": 3.910770156528726e-06, |
|
"loss": 0.6442, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.2552753474009264, |
|
"grad_norm": 0.13966801762580872, |
|
"learning_rate": 3.8967086351943665e-06, |
|
"loss": 0.6642, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.2573340195573854, |
|
"grad_norm": 0.13744676113128662, |
|
"learning_rate": 3.882665022231193e-06, |
|
"loss": 0.6637, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2593926917138445, |
|
"grad_norm": 0.1319979578256607, |
|
"learning_rate": 3.868639474502918e-06, |
|
"loss": 0.6473, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2614513638703038, |
|
"grad_norm": 0.13453079760074615, |
|
"learning_rate": 3.8546321486714714e-06, |
|
"loss": 0.6675, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.2635100360267628, |
|
"grad_norm": 0.1409771889448166, |
|
"learning_rate": 3.840643201195241e-06, |
|
"loss": 0.6544, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2655687081832219, |
|
"grad_norm": 0.14161136746406555, |
|
"learning_rate": 3.826672788327341e-06, |
|
"loss": 0.6815, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.267627380339681, |
|
"grad_norm": 0.1445418894290924, |
|
"learning_rate": 3.812721066113856e-06, |
|
"loss": 0.6496, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.26968605249614, |
|
"grad_norm": 0.13115796446800232, |
|
"learning_rate": 3.798788190392099e-06, |
|
"loss": 0.6662, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.271744724652599, |
|
"grad_norm": 0.13190345466136932, |
|
"learning_rate": 3.7848743167888735e-06, |
|
"loss": 0.6495, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.273803396809058, |
|
"grad_norm": 0.13790887594223022, |
|
"learning_rate": 3.770979600718734e-06, |
|
"loss": 0.6783, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.2758620689655173, |
|
"grad_norm": 0.14959625899791718, |
|
"learning_rate": 3.7571041973822497e-06, |
|
"loss": 0.6354, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2779207411219764, |
|
"grad_norm": 0.14615973830223083, |
|
"learning_rate": 3.743248261764274e-06, |
|
"loss": 0.6663, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2799794132784355, |
|
"grad_norm": 0.1481384038925171, |
|
"learning_rate": 3.7294119486322054e-06, |
|
"loss": 0.709, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2820380854348945, |
|
"grad_norm": 0.13109134137630463, |
|
"learning_rate": 3.7155954125342685e-06, |
|
"loss": 0.6524, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2840967575913536, |
|
"grad_norm": 0.15184319019317627, |
|
"learning_rate": 3.7017988077977796e-06, |
|
"loss": 0.6863, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.2861554297478126, |
|
"grad_norm": 0.13635946810245514, |
|
"learning_rate": 3.688022288527433e-06, |
|
"loss": 0.6599, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.2882141019042717, |
|
"grad_norm": 0.13947440683841705, |
|
"learning_rate": 3.6742660086035644e-06, |
|
"loss": 0.6553, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.290272774060731, |
|
"grad_norm": 0.12968918681144714, |
|
"learning_rate": 3.6605301216804477e-06, |
|
"loss": 0.6342, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.29233144621719, |
|
"grad_norm": 0.13788466155529022, |
|
"learning_rate": 3.64681478118457e-06, |
|
"loss": 0.6441, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.294390118373649, |
|
"grad_norm": 0.1530154049396515, |
|
"learning_rate": 3.633120140312919e-06, |
|
"loss": 0.6798, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.296448790530108, |
|
"grad_norm": 0.14195966720581055, |
|
"learning_rate": 3.619446352031273e-06, |
|
"loss": 0.6628, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.2985074626865671, |
|
"grad_norm": 0.16132400929927826, |
|
"learning_rate": 3.605793569072493e-06, |
|
"loss": 0.6335, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3005661348430262, |
|
"grad_norm": 0.15894006192684174, |
|
"learning_rate": 3.5921619439348167e-06, |
|
"loss": 0.6573, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.3026248069994852, |
|
"grad_norm": 0.1499107927083969, |
|
"learning_rate": 3.578551628880148e-06, |
|
"loss": 0.6414, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.3046834791559445, |
|
"grad_norm": 0.14822602272033691, |
|
"learning_rate": 3.5649627759323714e-06, |
|
"loss": 0.6327, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.3067421513124036, |
|
"grad_norm": 0.12993744015693665, |
|
"learning_rate": 3.551395536875637e-06, |
|
"loss": 0.6677, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.3088008234688626, |
|
"grad_norm": 0.13447578251361847, |
|
"learning_rate": 3.537850063252679e-06, |
|
"loss": 0.663, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.3108594956253217, |
|
"grad_norm": 0.1351039707660675, |
|
"learning_rate": 3.5243265063631125e-06, |
|
"loss": 0.658, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.3129181677817807, |
|
"grad_norm": 0.14985619485378265, |
|
"learning_rate": 3.510825017261752e-06, |
|
"loss": 0.6628, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.3149768399382398, |
|
"grad_norm": 0.13566601276397705, |
|
"learning_rate": 3.4973457467569155e-06, |
|
"loss": 0.6628, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.3170355120946988, |
|
"grad_norm": 0.1341661512851715, |
|
"learning_rate": 3.4838888454087517e-06, |
|
"loss": 0.6601, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.319094184251158, |
|
"grad_norm": 0.14291465282440186, |
|
"learning_rate": 3.4704544635275416e-06, |
|
"loss": 0.6678, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3211528564076171, |
|
"grad_norm": 0.14012368023395538, |
|
"learning_rate": 3.45704275117204e-06, |
|
"loss": 0.6644, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.3232115285640762, |
|
"grad_norm": 0.15221066772937775, |
|
"learning_rate": 3.4436538581477796e-06, |
|
"loss": 0.6909, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.3252702007205353, |
|
"grad_norm": 0.13185715675354004, |
|
"learning_rate": 3.430287934005414e-06, |
|
"loss": 0.6618, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.3273288728769943, |
|
"grad_norm": 0.12956033647060394, |
|
"learning_rate": 3.4169451280390343e-06, |
|
"loss": 0.6422, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.3293875450334534, |
|
"grad_norm": 0.13954877853393555, |
|
"learning_rate": 3.403625589284509e-06, |
|
"loss": 0.6683, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.3314462171899124, |
|
"grad_norm": 0.13885624706745148, |
|
"learning_rate": 3.3903294665178204e-06, |
|
"loss": 0.673, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.3335048893463717, |
|
"grad_norm": 0.13849471509456635, |
|
"learning_rate": 3.3770569082533943e-06, |
|
"loss": 0.6436, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.3355635615028307, |
|
"grad_norm": 0.1327974945306778, |
|
"learning_rate": 3.363808062742455e-06, |
|
"loss": 0.6658, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.3376222336592898, |
|
"grad_norm": 0.13802169263362885, |
|
"learning_rate": 3.350583077971352e-06, |
|
"loss": 0.6523, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.3396809058157488, |
|
"grad_norm": 0.12908506393432617, |
|
"learning_rate": 3.3373821016599236e-06, |
|
"loss": 0.627, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3417395779722079, |
|
"grad_norm": 0.13945898413658142, |
|
"learning_rate": 3.3242052812598353e-06, |
|
"loss": 0.6388, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.343798250128667, |
|
"grad_norm": 0.13595330715179443, |
|
"learning_rate": 3.3110527639529356e-06, |
|
"loss": 0.6333, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.345856922285126, |
|
"grad_norm": 0.14493203163146973, |
|
"learning_rate": 3.297924696649619e-06, |
|
"loss": 0.6817, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.3479155944415853, |
|
"grad_norm": 0.14481669664382935, |
|
"learning_rate": 3.2848212259871763e-06, |
|
"loss": 0.6535, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.3499742665980443, |
|
"grad_norm": 0.16757646203041077, |
|
"learning_rate": 3.2717424983281598e-06, |
|
"loss": 0.6417, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.3520329387545034, |
|
"grad_norm": 0.16268573701381683, |
|
"learning_rate": 3.2586886597587466e-06, |
|
"loss": 0.6621, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.3540916109109624, |
|
"grad_norm": 0.1454005092382431, |
|
"learning_rate": 3.2456598560871133e-06, |
|
"loss": 0.66, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.3561502830674215, |
|
"grad_norm": 0.12595096230506897, |
|
"learning_rate": 3.2326562328418e-06, |
|
"loss": 0.6469, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.3582089552238805, |
|
"grad_norm": 0.14004173874855042, |
|
"learning_rate": 3.2196779352700857e-06, |
|
"loss": 0.5994, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.3602676273803396, |
|
"grad_norm": 0.13839490711688995, |
|
"learning_rate": 3.2067251083363714e-06, |
|
"loss": 0.6671, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.3623262995367988, |
|
"grad_norm": 0.13650719821453094, |
|
"learning_rate": 3.1937978967205585e-06, |
|
"loss": 0.6521, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.364384971693258, |
|
"grad_norm": 0.14337030053138733, |
|
"learning_rate": 3.180896444816427e-06, |
|
"loss": 0.647, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.366443643849717, |
|
"grad_norm": 0.1445995718240738, |
|
"learning_rate": 3.168020896730028e-06, |
|
"loss": 0.6534, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.368502316006176, |
|
"grad_norm": 0.14641247689723969, |
|
"learning_rate": 3.155171396278078e-06, |
|
"loss": 0.6362, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.370560988162635, |
|
"grad_norm": 0.13869577646255493, |
|
"learning_rate": 3.142348086986342e-06, |
|
"loss": 0.6706, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.372619660319094, |
|
"grad_norm": 0.1305873841047287, |
|
"learning_rate": 3.1295511120880382e-06, |
|
"loss": 0.651, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3746783324755532, |
|
"grad_norm": 0.12877009809017181, |
|
"learning_rate": 3.116780614522238e-06, |
|
"loss": 0.6582, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.3767370046320124, |
|
"grad_norm": 0.13101081550121307, |
|
"learning_rate": 3.104036736932268e-06, |
|
"loss": 0.6499, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.3787956767884715, |
|
"grad_norm": 0.1467534303665161, |
|
"learning_rate": 3.0913196216641105e-06, |
|
"loss": 0.6344, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.3808543489449305, |
|
"grad_norm": 0.1271016001701355, |
|
"learning_rate": 3.078629410764824e-06, |
|
"loss": 0.6807, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.3829130211013896, |
|
"grad_norm": 0.14458723366260529, |
|
"learning_rate": 3.0659662459809525e-06, |
|
"loss": 0.6496, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.3849716932578486, |
|
"grad_norm": 0.13007880747318268, |
|
"learning_rate": 3.0533302687569368e-06, |
|
"loss": 0.6656, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.387030365414308, |
|
"grad_norm": 0.13446252048015594, |
|
"learning_rate": 3.040721620233546e-06, |
|
"loss": 0.6453, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.3890890375707667, |
|
"grad_norm": 0.13023251295089722, |
|
"learning_rate": 3.0281404412462866e-06, |
|
"loss": 0.6548, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.391147709727226, |
|
"grad_norm": 0.16312147676944733, |
|
"learning_rate": 3.015586872323846e-06, |
|
"loss": 0.6588, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.393206381883685, |
|
"grad_norm": 0.1303606480360031, |
|
"learning_rate": 3.0030610536865047e-06, |
|
"loss": 0.6561, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.3952650540401441, |
|
"grad_norm": 0.1296800971031189, |
|
"learning_rate": 2.9905631252445864e-06, |
|
"loss": 0.6457, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3973237261966032, |
|
"grad_norm": 0.13874535262584686, |
|
"learning_rate": 2.978093226596884e-06, |
|
"loss": 0.6561, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3993823983530622, |
|
"grad_norm": 0.14332440495491028, |
|
"learning_rate": 2.965651497029108e-06, |
|
"loss": 0.6723, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.4014410705095215, |
|
"grad_norm": 0.15567727386951447, |
|
"learning_rate": 2.953238075512321e-06, |
|
"loss": 0.6654, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4034997426659803, |
|
"grad_norm": 0.1367722749710083, |
|
"learning_rate": 2.940853100701402e-06, |
|
"loss": 0.6593, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.4055584148224396, |
|
"grad_norm": 0.13934288918972015, |
|
"learning_rate": 2.928496710933477e-06, |
|
"loss": 0.6909, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.4076170869788986, |
|
"grad_norm": 0.1457984745502472, |
|
"learning_rate": 2.916169044226387e-06, |
|
"loss": 0.6477, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.4096757591353577, |
|
"grad_norm": 0.13270024955272675, |
|
"learning_rate": 2.9038702382771476e-06, |
|
"loss": 0.6588, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.4117344312918167, |
|
"grad_norm": 0.13029582798480988, |
|
"learning_rate": 2.891600430460402e-06, |
|
"loss": 0.6564, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.4137931034482758, |
|
"grad_norm": 0.14011262357234955, |
|
"learning_rate": 2.879359757826895e-06, |
|
"loss": 0.6368, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.415851775604735, |
|
"grad_norm": 0.14998824894428253, |
|
"learning_rate": 2.8671483571019337e-06, |
|
"loss": 0.629, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 0.14513614773750305, |
|
"learning_rate": 2.854966364683872e-06, |
|
"loss": 0.656, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.4199691199176532, |
|
"grad_norm": 0.1355086714029312, |
|
"learning_rate": 2.842813916642574e-06, |
|
"loss": 0.6649, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.4220277920741122, |
|
"grad_norm": 0.143692284822464, |
|
"learning_rate": 2.8306911487179023e-06, |
|
"loss": 0.64, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4240864642305713, |
|
"grad_norm": 0.13343499600887299, |
|
"learning_rate": 2.818598196318201e-06, |
|
"loss": 0.6644, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.4261451363870303, |
|
"grad_norm": 0.14596882462501526, |
|
"learning_rate": 2.8065351945187837e-06, |
|
"loss": 0.6387, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.4282038085434894, |
|
"grad_norm": 0.1412520557641983, |
|
"learning_rate": 2.7945022780604185e-06, |
|
"loss": 0.6451, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.4302624806999487, |
|
"grad_norm": 0.12341731041669846, |
|
"learning_rate": 2.7824995813478295e-06, |
|
"loss": 0.6593, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.4323211528564075, |
|
"grad_norm": 0.1608169823884964, |
|
"learning_rate": 2.7705272384481972e-06, |
|
"loss": 0.6376, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.4343798250128668, |
|
"grad_norm": 0.13672709465026855, |
|
"learning_rate": 2.7585853830896527e-06, |
|
"loss": 0.6308, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.4364384971693258, |
|
"grad_norm": 0.13107231259346008, |
|
"learning_rate": 2.746674148659788e-06, |
|
"loss": 0.6537, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.4384971693257849, |
|
"grad_norm": 0.1523398607969284, |
|
"learning_rate": 2.734793668204172e-06, |
|
"loss": 0.6627, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.440555841482244, |
|
"grad_norm": 0.13535884022712708, |
|
"learning_rate": 2.722944074424858e-06, |
|
"loss": 0.6389, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.442614513638703, |
|
"grad_norm": 0.15502335131168365, |
|
"learning_rate": 2.7111254996788995e-06, |
|
"loss": 0.6563, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4446731857951622, |
|
"grad_norm": 0.14101667702198029, |
|
"learning_rate": 2.699338075976875e-06, |
|
"loss": 0.6613, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.446731857951621, |
|
"grad_norm": 0.12796726822853088, |
|
"learning_rate": 2.687581934981419e-06, |
|
"loss": 0.6556, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.4487905301080803, |
|
"grad_norm": 0.1300426423549652, |
|
"learning_rate": 2.6758572080057398e-06, |
|
"loss": 0.6375, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.4508492022645394, |
|
"grad_norm": 0.14779579639434814, |
|
"learning_rate": 2.664164026012161e-06, |
|
"loss": 0.6371, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.4529078744209984, |
|
"grad_norm": 0.14179164171218872, |
|
"learning_rate": 2.652502519610657e-06, |
|
"loss": 0.6609, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.4549665465774575, |
|
"grad_norm": 0.1387166529893875, |
|
"learning_rate": 2.640872819057394e-06, |
|
"loss": 0.6509, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.4570252187339165, |
|
"grad_norm": 0.13394972681999207, |
|
"learning_rate": 2.629275054253274e-06, |
|
"loss": 0.6236, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.4590838908903758, |
|
"grad_norm": 0.16035096347332, |
|
"learning_rate": 2.6177093547424826e-06, |
|
"loss": 0.6318, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.4611425630468349, |
|
"grad_norm": 0.14260070025920868, |
|
"learning_rate": 2.606175849711048e-06, |
|
"loss": 0.6625, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.463201235203294, |
|
"grad_norm": 0.1396186351776123, |
|
"learning_rate": 2.5946746679853894e-06, |
|
"loss": 0.6741, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.465259907359753, |
|
"grad_norm": 0.14683037996292114, |
|
"learning_rate": 2.583205938030888e-06, |
|
"loss": 0.6482, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.467318579516212, |
|
"grad_norm": 0.15719832479953766, |
|
"learning_rate": 2.57176978795044e-06, |
|
"loss": 0.6416, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.469377251672671, |
|
"grad_norm": 0.13454781472682953, |
|
"learning_rate": 2.5603663454830395e-06, |
|
"loss": 0.6622, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.4714359238291301, |
|
"grad_norm": 0.13637712597846985, |
|
"learning_rate": 2.548995738002338e-06, |
|
"loss": 0.662, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4734945959855894, |
|
"grad_norm": 0.13223333656787872, |
|
"learning_rate": 2.5376580925152335e-06, |
|
"loss": 0.6284, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4755532681420485, |
|
"grad_norm": 0.14275510609149933, |
|
"learning_rate": 2.5263535356604428e-06, |
|
"loss": 0.6585, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4776119402985075, |
|
"grad_norm": 0.1544455587863922, |
|
"learning_rate": 2.5150821937070946e-06, |
|
"loss": 0.6852, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4796706124549666, |
|
"grad_norm": 0.1354699581861496, |
|
"learning_rate": 2.503844192553313e-06, |
|
"loss": 0.6394, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4817292846114256, |
|
"grad_norm": 0.16445569694042206, |
|
"learning_rate": 2.4926396577248097e-06, |
|
"loss": 0.6811, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.4837879567678847, |
|
"grad_norm": 0.13989467918872833, |
|
"learning_rate": 2.481468714373496e-06, |
|
"loss": 0.638, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.4858466289243437, |
|
"grad_norm": 0.13825134932994843, |
|
"learning_rate": 2.4703314872760623e-06, |
|
"loss": 0.6662, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.487905301080803, |
|
"grad_norm": 0.12880556285381317, |
|
"learning_rate": 2.4592281008326075e-06, |
|
"loss": 0.6502, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.489963973237262, |
|
"grad_norm": 0.12772305309772491, |
|
"learning_rate": 2.448158679065231e-06, |
|
"loss": 0.6763, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.492022645393721, |
|
"grad_norm": 0.13914746046066284, |
|
"learning_rate": 2.437123345616661e-06, |
|
"loss": 0.645, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.4940813175501801, |
|
"grad_norm": 0.1409793198108673, |
|
"learning_rate": 2.4261222237488616e-06, |
|
"loss": 0.6588, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4961399897066392, |
|
"grad_norm": 0.1305626630783081, |
|
"learning_rate": 2.4151554363416676e-06, |
|
"loss": 0.6603, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.4981986618630982, |
|
"grad_norm": 0.13002273440361023, |
|
"learning_rate": 2.4042231058914027e-06, |
|
"loss": 0.6248, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.5002573340195573, |
|
"grad_norm": 0.14312808215618134, |
|
"learning_rate": 2.3933253545095143e-06, |
|
"loss": 0.6459, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.5023160061760166, |
|
"grad_norm": 0.133903369307518, |
|
"learning_rate": 2.382462303921213e-06, |
|
"loss": 0.6477, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.5043746783324754, |
|
"grad_norm": 0.1325589120388031, |
|
"learning_rate": 2.37163407546411e-06, |
|
"loss": 0.6816, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.5064333504889347, |
|
"grad_norm": 0.13143044710159302, |
|
"learning_rate": 2.3608407900868597e-06, |
|
"loss": 0.6492, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.5084920226453937, |
|
"grad_norm": 0.13435474038124084, |
|
"learning_rate": 2.3500825683478096e-06, |
|
"loss": 0.6528, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.5105506948018528, |
|
"grad_norm": 0.1382218599319458, |
|
"learning_rate": 2.33935953041366e-06, |
|
"loss": 0.6292, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.512609366958312, |
|
"grad_norm": 0.14546315371990204, |
|
"learning_rate": 2.328671796058113e-06, |
|
"loss": 0.6548, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.5146680391147709, |
|
"grad_norm": 0.14825578033924103, |
|
"learning_rate": 2.3180194846605367e-06, |
|
"loss": 0.65, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.5167267112712302, |
|
"grad_norm": 0.1339276134967804, |
|
"learning_rate": 2.3074027152046384e-06, |
|
"loss": 0.6475, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.518785383427689, |
|
"grad_norm": 0.1629195660352707, |
|
"learning_rate": 2.296821606277129e-06, |
|
"loss": 0.6302, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.5208440555841483, |
|
"grad_norm": 0.13913467526435852, |
|
"learning_rate": 2.286276276066398e-06, |
|
"loss": 0.6858, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.5229027277406073, |
|
"grad_norm": 0.13781745731830597, |
|
"learning_rate": 2.275766842361195e-06, |
|
"loss": 0.6477, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.5249613998970664, |
|
"grad_norm": 0.13842599093914032, |
|
"learning_rate": 2.265293422549319e-06, |
|
"loss": 0.6507, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5270200720535256, |
|
"grad_norm": 0.1484973430633545, |
|
"learning_rate": 2.2548561336162984e-06, |
|
"loss": 0.6636, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.5290787442099845, |
|
"grad_norm": 0.13376715779304504, |
|
"learning_rate": 2.2444550921440884e-06, |
|
"loss": 0.6561, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.5311374163664437, |
|
"grad_norm": 0.1416800618171692, |
|
"learning_rate": 2.2340904143097715e-06, |
|
"loss": 0.6542, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.5331960885229026, |
|
"grad_norm": 0.1370648294687271, |
|
"learning_rate": 2.2237622158842584e-06, |
|
"loss": 0.6755, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.5352547606793618, |
|
"grad_norm": 0.12818501889705658, |
|
"learning_rate": 2.213470612230989e-06, |
|
"loss": 0.6177, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.537313432835821, |
|
"grad_norm": 0.1407281905412674, |
|
"learning_rate": 2.2032157183046515e-06, |
|
"loss": 0.6507, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.53937210499228, |
|
"grad_norm": 0.13501091301441193, |
|
"learning_rate": 2.192997648649896e-06, |
|
"loss": 0.6468, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.5414307771487392, |
|
"grad_norm": 0.12218450754880905, |
|
"learning_rate": 2.1828165174000513e-06, |
|
"loss": 0.6429, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.543489449305198, |
|
"grad_norm": 0.13836924731731415, |
|
"learning_rate": 2.172672438275859e-06, |
|
"loss": 0.6598, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.5455481214616573, |
|
"grad_norm": 0.14301835000514984, |
|
"learning_rate": 2.162565524584191e-06, |
|
"loss": 0.6469, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5476067936181161, |
|
"grad_norm": 0.13408608734607697, |
|
"learning_rate": 2.1524958892167968e-06, |
|
"loss": 0.6468, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.5496654657745754, |
|
"grad_norm": 0.14460250735282898, |
|
"learning_rate": 2.142463644649029e-06, |
|
"loss": 0.6726, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 0.12662199139595032, |
|
"learning_rate": 2.132468902938602e-06, |
|
"loss": 0.6322, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.5537828100874935, |
|
"grad_norm": 0.13386270403862, |
|
"learning_rate": 2.1225117757243263e-06, |
|
"loss": 0.6694, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.5558414822439528, |
|
"grad_norm": 0.13018876314163208, |
|
"learning_rate": 2.112592374224869e-06, |
|
"loss": 0.6514, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.5579001544004116, |
|
"grad_norm": 0.13406234979629517, |
|
"learning_rate": 2.1027108092375147e-06, |
|
"loss": 0.6556, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.559958826556871, |
|
"grad_norm": 0.1314801722764969, |
|
"learning_rate": 2.0928671911369157e-06, |
|
"loss": 0.6597, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.56201749871333, |
|
"grad_norm": 0.1464812457561493, |
|
"learning_rate": 2.0830616298738746e-06, |
|
"loss": 0.659, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.564076170869789, |
|
"grad_norm": 0.13219048082828522, |
|
"learning_rate": 2.0732942349741025e-06, |
|
"loss": 0.6683, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.566134843026248, |
|
"grad_norm": 0.12457743287086487, |
|
"learning_rate": 2.0635651155370064e-06, |
|
"loss": 0.6264, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.568193515182707, |
|
"grad_norm": 0.14103274047374725, |
|
"learning_rate": 2.053874380234461e-06, |
|
"loss": 0.65, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.5702521873391664, |
|
"grad_norm": 0.12605510652065277, |
|
"learning_rate": 2.0442221373096045e-06, |
|
"loss": 0.6407, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.5723108594956252, |
|
"grad_norm": 0.1612055003643036, |
|
"learning_rate": 2.03460849457562e-06, |
|
"loss": 0.6741, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.5743695316520845, |
|
"grad_norm": 0.1546323448419571, |
|
"learning_rate": 2.02503355941454e-06, |
|
"loss": 0.6852, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5764282038085435, |
|
"grad_norm": 0.13762885332107544, |
|
"learning_rate": 2.0154974387760404e-06, |
|
"loss": 0.6535, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5784868759650026, |
|
"grad_norm": 0.1335284411907196, |
|
"learning_rate": 2.0060002391762477e-06, |
|
"loss": 0.6552, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5805455481214616, |
|
"grad_norm": 0.1296115517616272, |
|
"learning_rate": 1.996542066696553e-06, |
|
"loss": 0.6613, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.5826042202779207, |
|
"grad_norm": 0.15091270208358765, |
|
"learning_rate": 1.987123026982423e-06, |
|
"loss": 0.6548, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.58466289243438, |
|
"grad_norm": 0.1307397335767746, |
|
"learning_rate": 1.97774322524222e-06, |
|
"loss": 0.6492, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.5867215645908388, |
|
"grad_norm": 0.14567354321479797, |
|
"learning_rate": 1.968402766246026e-06, |
|
"loss": 0.6574, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.588780236747298, |
|
"grad_norm": 0.13739728927612305, |
|
"learning_rate": 1.959101754324479e-06, |
|
"loss": 0.6759, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.5908389089037571, |
|
"grad_norm": 0.13658419251441956, |
|
"learning_rate": 1.9498402933676e-06, |
|
"loss": 0.6705, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5928975810602162, |
|
"grad_norm": 0.12363572418689728, |
|
"learning_rate": 1.940618486823632e-06, |
|
"loss": 0.666, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.5949562532166752, |
|
"grad_norm": 0.15004467964172363, |
|
"learning_rate": 1.931436437697896e-06, |
|
"loss": 0.6403, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.5970149253731343, |
|
"grad_norm": 0.12857025861740112, |
|
"learning_rate": 1.9222942485516265e-06, |
|
"loss": 0.6526, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5990735975295935, |
|
"grad_norm": 0.1368139386177063, |
|
"learning_rate": 1.9131920215008344e-06, |
|
"loss": 0.6429, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.6011322696860524, |
|
"grad_norm": 0.138963520526886, |
|
"learning_rate": 1.904129858215159e-06, |
|
"loss": 0.6561, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.6031909418425117, |
|
"grad_norm": 0.1443481296300888, |
|
"learning_rate": 1.895107859916746e-06, |
|
"loss": 0.6668, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.6052496139989707, |
|
"grad_norm": 0.13580967485904694, |
|
"learning_rate": 1.8861261273790994e-06, |
|
"loss": 0.6736, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.6073082861554298, |
|
"grad_norm": 0.1417776346206665, |
|
"learning_rate": 1.8771847609259675e-06, |
|
"loss": 0.6597, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.6093669583118888, |
|
"grad_norm": 0.14724156260490417, |
|
"learning_rate": 1.8682838604302217e-06, |
|
"loss": 0.6525, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.6114256304683479, |
|
"grad_norm": 0.13658173382282257, |
|
"learning_rate": 1.8594235253127373e-06, |
|
"loss": 0.6571, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.6134843026248071, |
|
"grad_norm": 0.12604515254497528, |
|
"learning_rate": 1.8506038545412823e-06, |
|
"loss": 0.6514, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.615542974781266, |
|
"grad_norm": 0.1426461786031723, |
|
"learning_rate": 1.8418249466294153e-06, |
|
"loss": 0.665, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.6176016469377252, |
|
"grad_norm": 0.13183601200580597, |
|
"learning_rate": 1.8330868996353851e-06, |
|
"loss": 0.6575, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.6196603190941843, |
|
"grad_norm": 0.1418440341949463, |
|
"learning_rate": 1.8243898111610314e-06, |
|
"loss": 0.6344, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.6217189912506433, |
|
"grad_norm": 0.1399330198764801, |
|
"learning_rate": 1.8157337783506998e-06, |
|
"loss": 0.6418, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.6237776634071024, |
|
"grad_norm": 0.13530658185482025, |
|
"learning_rate": 1.807118897890152e-06, |
|
"loss": 0.6339, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.6258363355635614, |
|
"grad_norm": 0.12609177827835083, |
|
"learning_rate": 1.7985452660054908e-06, |
|
"loss": 0.6319, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.6278950077200207, |
|
"grad_norm": 0.16546158492565155, |
|
"learning_rate": 1.7900129784620798e-06, |
|
"loss": 0.6191, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6299536798764795, |
|
"grad_norm": 0.13399174809455872, |
|
"learning_rate": 1.7815221305634764e-06, |
|
"loss": 0.6648, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.6320123520329388, |
|
"grad_norm": 0.1417001485824585, |
|
"learning_rate": 1.7730728171503704e-06, |
|
"loss": 0.6445, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.6340710241893979, |
|
"grad_norm": 0.14180314540863037, |
|
"learning_rate": 1.7646651325995178e-06, |
|
"loss": 0.6386, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.636129696345857, |
|
"grad_norm": 0.14928773045539856, |
|
"learning_rate": 1.7562991708226945e-06, |
|
"loss": 0.6492, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.638188368502316, |
|
"grad_norm": 0.1402241438627243, |
|
"learning_rate": 1.7479750252656388e-06, |
|
"loss": 0.6238, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.640247040658775, |
|
"grad_norm": 0.13829921185970306, |
|
"learning_rate": 1.7396927889070164e-06, |
|
"loss": 0.6583, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.6423057128152343, |
|
"grad_norm": 0.1358102262020111, |
|
"learning_rate": 1.731452554257373e-06, |
|
"loss": 0.6501, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.6443643849716931, |
|
"grad_norm": 0.1420017033815384, |
|
"learning_rate": 1.723254413358111e-06, |
|
"loss": 0.6591, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.6464230571281524, |
|
"grad_norm": 0.1424987018108368, |
|
"learning_rate": 1.715098457780449e-06, |
|
"loss": 0.6729, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.6484817292846115, |
|
"grad_norm": 0.13872523605823517, |
|
"learning_rate": 1.7069847786244136e-06, |
|
"loss": 0.6307, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6505404014410705, |
|
"grad_norm": 0.13062597811222076, |
|
"learning_rate": 1.698913466517808e-06, |
|
"loss": 0.6428, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.6525990735975296, |
|
"grad_norm": 0.14626429975032806, |
|
"learning_rate": 1.690884611615209e-06, |
|
"loss": 0.6198, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.6546577457539886, |
|
"grad_norm": 0.15756435692310333, |
|
"learning_rate": 1.6828983035969565e-06, |
|
"loss": 0.6731, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.6567164179104479, |
|
"grad_norm": 0.14816921949386597, |
|
"learning_rate": 1.67495463166815e-06, |
|
"loss": 0.6242, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.6587750900669067, |
|
"grad_norm": 0.13179270923137665, |
|
"learning_rate": 1.6670536845576573e-06, |
|
"loss": 0.6626, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.660833762223366, |
|
"grad_norm": 0.13587595522403717, |
|
"learning_rate": 1.6591955505171198e-06, |
|
"loss": 0.6534, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.662892434379825, |
|
"grad_norm": 0.13836577534675598, |
|
"learning_rate": 1.6513803173199653e-06, |
|
"loss": 0.655, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.664951106536284, |
|
"grad_norm": 0.1396552473306656, |
|
"learning_rate": 1.6436080722604314e-06, |
|
"loss": 0.6557, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6670097786927431, |
|
"grad_norm": 0.14703871309757233, |
|
"learning_rate": 1.63587890215259e-06, |
|
"loss": 0.5978, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.6690684508492022, |
|
"grad_norm": 0.1638912856578827, |
|
"learning_rate": 1.628192893329374e-06, |
|
"loss": 0.6394, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6711271230056615, |
|
"grad_norm": 0.14806923270225525, |
|
"learning_rate": 1.620550131641615e-06, |
|
"loss": 0.6538, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.6731857951621203, |
|
"grad_norm": 0.1463487446308136, |
|
"learning_rate": 1.612950702457087e-06, |
|
"loss": 0.6587, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.6752444673185796, |
|
"grad_norm": 0.1333266645669937, |
|
"learning_rate": 1.6053946906595502e-06, |
|
"loss": 0.6237, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6773031394750386, |
|
"grad_norm": 0.1463502198457718, |
|
"learning_rate": 1.5978821806478027e-06, |
|
"loss": 0.675, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.6793618116314977, |
|
"grad_norm": 0.13438794016838074, |
|
"learning_rate": 1.590413256334736e-06, |
|
"loss": 0.6444, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.6814204837879567, |
|
"grad_norm": 0.1534508913755417, |
|
"learning_rate": 1.582988001146405e-06, |
|
"loss": 0.6584, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.6834791559444158, |
|
"grad_norm": 0.12801076471805573, |
|
"learning_rate": 1.5756064980210867e-06, |
|
"loss": 0.667, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.685537828100875, |
|
"grad_norm": 0.14711901545524597, |
|
"learning_rate": 1.5682688294083594e-06, |
|
"loss": 0.6405, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6875965002573339, |
|
"grad_norm": 0.12615957856178284, |
|
"learning_rate": 1.5609750772681826e-06, |
|
"loss": 0.6632, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6896551724137931, |
|
"grad_norm": 0.14474698901176453, |
|
"learning_rate": 1.5537253230699784e-06, |
|
"loss": 0.6381, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6917138445702522, |
|
"grad_norm": 0.14288835227489471, |
|
"learning_rate": 1.5465196477917225e-06, |
|
"loss": 0.6771, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.6937725167267113, |
|
"grad_norm": 0.14714893698692322, |
|
"learning_rate": 1.5393581319190382e-06, |
|
"loss": 0.6537, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.6958311888831703, |
|
"grad_norm": 0.13413602113723755, |
|
"learning_rate": 1.5322408554443027e-06, |
|
"loss": 0.6582, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6978898610396294, |
|
"grad_norm": 0.14149825274944305, |
|
"learning_rate": 1.5251678978657464e-06, |
|
"loss": 0.6479, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.6999485331960886, |
|
"grad_norm": 0.14568348228931427, |
|
"learning_rate": 1.5181393381865716e-06, |
|
"loss": 0.668, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.7020072053525475, |
|
"grad_norm": 0.13481447100639343, |
|
"learning_rate": 1.511155254914065e-06, |
|
"loss": 0.6473, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.7040658775090067, |
|
"grad_norm": 0.15289506316184998, |
|
"learning_rate": 1.5042157260587231e-06, |
|
"loss": 0.6351, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.7061245496654658, |
|
"grad_norm": 0.12859320640563965, |
|
"learning_rate": 1.4973208291333813e-06, |
|
"loss": 0.6305, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.7081832218219248, |
|
"grad_norm": 0.1490619033575058, |
|
"learning_rate": 1.490470641152345e-06, |
|
"loss": 0.6296, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.7102418939783839, |
|
"grad_norm": 0.12138031423091888, |
|
"learning_rate": 1.4836652386305351e-06, |
|
"loss": 0.639, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.712300566134843, |
|
"grad_norm": 0.13470202684402466, |
|
"learning_rate": 1.4769046975826267e-06, |
|
"loss": 0.6458, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.7143592382913022, |
|
"grad_norm": 0.14960254728794098, |
|
"learning_rate": 1.4701890935222062e-06, |
|
"loss": 0.6717, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 0.13615302741527557, |
|
"learning_rate": 1.4635185014609216e-06, |
|
"loss": 0.6509, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.7184765826042203, |
|
"grad_norm": 0.12218176573514938, |
|
"learning_rate": 1.4568929959076512e-06, |
|
"loss": 0.647, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.7205352547606794, |
|
"grad_norm": 0.13748720288276672, |
|
"learning_rate": 1.4503126508676652e-06, |
|
"loss": 0.672, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.7225939269171384, |
|
"grad_norm": 0.1389789581298828, |
|
"learning_rate": 1.4437775398418042e-06, |
|
"loss": 0.6587, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.7246525990735977, |
|
"grad_norm": 0.13413724303245544, |
|
"learning_rate": 1.4372877358256543e-06, |
|
"loss": 0.6538, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.7267112712300565, |
|
"grad_norm": 0.1562497615814209, |
|
"learning_rate": 1.4308433113087346e-06, |
|
"loss": 0.6617, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.7287699433865158, |
|
"grad_norm": 0.1408907175064087, |
|
"learning_rate": 1.4244443382736858e-06, |
|
"loss": 0.6504, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.7308286155429746, |
|
"grad_norm": 0.13438640534877777, |
|
"learning_rate": 1.4180908881954668e-06, |
|
"loss": 0.654, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.732887287699434, |
|
"grad_norm": 0.13639037311077118, |
|
"learning_rate": 1.4117830320405568e-06, |
|
"loss": 0.6685, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.734945959855893, |
|
"grad_norm": 0.14213210344314575, |
|
"learning_rate": 1.405520840266159e-06, |
|
"loss": 0.6556, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.737004632012352, |
|
"grad_norm": 0.1482432335615158, |
|
"learning_rate": 1.3993043828194217e-06, |
|
"loss": 0.6419, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.7390633041688113, |
|
"grad_norm": 0.1303771287202835, |
|
"learning_rate": 1.3931337291366488e-06, |
|
"loss": 0.6361, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.74112197632527, |
|
"grad_norm": 0.1479034125804901, |
|
"learning_rate": 1.387008948142528e-06, |
|
"loss": 0.6569, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.7431806484817294, |
|
"grad_norm": 0.168193057179451, |
|
"learning_rate": 1.3809301082493592e-06, |
|
"loss": 0.6567, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.7452393206381882, |
|
"grad_norm": 0.13786673545837402, |
|
"learning_rate": 1.3748972773562946e-06, |
|
"loss": 0.6405, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.7472979927946475, |
|
"grad_norm": 0.13587407767772675, |
|
"learning_rate": 1.3689105228485739e-06, |
|
"loss": 0.6353, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.7493566649511065, |
|
"grad_norm": 0.13992324471473694, |
|
"learning_rate": 1.3629699115967757e-06, |
|
"loss": 0.655, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.7514153371075656, |
|
"grad_norm": 0.13453035056591034, |
|
"learning_rate": 1.3570755099560701e-06, |
|
"loss": 0.658, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7534740092640249, |
|
"grad_norm": 0.15303651988506317, |
|
"learning_rate": 1.3512273837654793e-06, |
|
"loss": 0.6135, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.7555326814204837, |
|
"grad_norm": 0.13186489045619965, |
|
"learning_rate": 1.3454255983471367e-06, |
|
"loss": 0.6647, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.757591353576943, |
|
"grad_norm": 0.13427408039569855, |
|
"learning_rate": 1.3396702185055614e-06, |
|
"loss": 0.6597, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.7596500257334018, |
|
"grad_norm": 0.15728993713855743, |
|
"learning_rate": 1.3339613085269357e-06, |
|
"loss": 0.659, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.761708697889861, |
|
"grad_norm": 0.13846909999847412, |
|
"learning_rate": 1.3282989321783822e-06, |
|
"loss": 0.6543, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.7637673700463201, |
|
"grad_norm": 0.13936255872249603, |
|
"learning_rate": 1.322683152707255e-06, |
|
"loss": 0.6436, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.7658260422027792, |
|
"grad_norm": 0.1383313685655594, |
|
"learning_rate": 1.3171140328404339e-06, |
|
"loss": 0.6493, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.7678847143592384, |
|
"grad_norm": 0.14984488487243652, |
|
"learning_rate": 1.3115916347836222e-06, |
|
"loss": 0.6628, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.7699433865156973, |
|
"grad_norm": 0.1229867935180664, |
|
"learning_rate": 1.3061160202206501e-06, |
|
"loss": 0.6301, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.7720020586721565, |
|
"grad_norm": 0.12617282569408417, |
|
"learning_rate": 1.3006872503127887e-06, |
|
"loss": 0.6368, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7740607308286156, |
|
"grad_norm": 0.1278039813041687, |
|
"learning_rate": 1.2953053856980674e-06, |
|
"loss": 0.6588, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.7761194029850746, |
|
"grad_norm": 0.1354069858789444, |
|
"learning_rate": 1.2899704864905922e-06, |
|
"loss": 0.6669, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.7781780751415337, |
|
"grad_norm": 0.1457299292087555, |
|
"learning_rate": 1.284682612279878e-06, |
|
"loss": 0.6522, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.7802367472979927, |
|
"grad_norm": 0.13067875802516937, |
|
"learning_rate": 1.2794418221301842e-06, |
|
"loss": 0.6367, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.782295419454452, |
|
"grad_norm": 0.14541006088256836, |
|
"learning_rate": 1.2742481745798496e-06, |
|
"loss": 0.6594, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.7843540916109109, |
|
"grad_norm": 0.14047367870807648, |
|
"learning_rate": 1.269101727640644e-06, |
|
"loss": 0.635, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7864127637673701, |
|
"grad_norm": 0.1398647427558899, |
|
"learning_rate": 1.2640025387971156e-06, |
|
"loss": 0.6349, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7884714359238292, |
|
"grad_norm": 0.13229133188724518, |
|
"learning_rate": 1.2589506650059544e-06, |
|
"loss": 0.6436, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7905301080802882, |
|
"grad_norm": 0.18162629008293152, |
|
"learning_rate": 1.2539461626953498e-06, |
|
"loss": 0.6548, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7925887802367473, |
|
"grad_norm": 0.14638854563236237, |
|
"learning_rate": 1.248989087764366e-06, |
|
"loss": 0.675, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7946474523932063, |
|
"grad_norm": 0.12400197237730026, |
|
"learning_rate": 1.2440794955823128e-06, |
|
"loss": 0.6442, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.7967061245496656, |
|
"grad_norm": 0.13715320825576782, |
|
"learning_rate": 1.2392174409881311e-06, |
|
"loss": 0.6573, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.7987647967061244, |
|
"grad_norm": 0.1651625782251358, |
|
"learning_rate": 1.2344029782897774e-06, |
|
"loss": 0.6412, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.8008234688625837, |
|
"grad_norm": 0.13429532945156097, |
|
"learning_rate": 1.229636161263619e-06, |
|
"loss": 0.6756, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.8028821410190428, |
|
"grad_norm": 0.13396626710891724, |
|
"learning_rate": 1.224917043153832e-06, |
|
"loss": 0.6354, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.8049408131755018, |
|
"grad_norm": 0.1430749148130417, |
|
"learning_rate": 1.2202456766718092e-06, |
|
"loss": 0.6482, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.8069994853319609, |
|
"grad_norm": 0.15483106672763824, |
|
"learning_rate": 1.2156221139955669e-06, |
|
"loss": 0.6767, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.80905815748842, |
|
"grad_norm": 0.1451236456632614, |
|
"learning_rate": 1.2110464067691666e-06, |
|
"loss": 0.6449, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.8111168296448792, |
|
"grad_norm": 0.1282985806465149, |
|
"learning_rate": 1.206518606102135e-06, |
|
"loss": 0.6602, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.813175501801338, |
|
"grad_norm": 0.13331440091133118, |
|
"learning_rate": 1.2020387625688943e-06, |
|
"loss": 0.66, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.8152341739577973, |
|
"grad_norm": 0.13615739345550537, |
|
"learning_rate": 1.1976069262081987e-06, |
|
"loss": 0.6377, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.8172928461142563, |
|
"grad_norm": 0.14270727336406708, |
|
"learning_rate": 1.1932231465225714e-06, |
|
"loss": 0.6456, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.8193515182707154, |
|
"grad_norm": 0.13713142275810242, |
|
"learning_rate": 1.1888874724777582e-06, |
|
"loss": 0.6569, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.8214101904271744, |
|
"grad_norm": 0.13675430417060852, |
|
"learning_rate": 1.1845999525021723e-06, |
|
"loss": 0.6474, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.8234688625836335, |
|
"grad_norm": 0.14368852972984314, |
|
"learning_rate": 1.1803606344863615e-06, |
|
"loss": 0.5877, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.8255275347400928, |
|
"grad_norm": 0.14031407237052917, |
|
"learning_rate": 1.1761695657824677e-06, |
|
"loss": 0.6448, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.8275862068965516, |
|
"grad_norm": 0.150904580950737, |
|
"learning_rate": 1.1720267932036986e-06, |
|
"loss": 0.6559, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.8296448790530109, |
|
"grad_norm": 0.14362327754497528, |
|
"learning_rate": 1.1679323630238087e-06, |
|
"loss": 0.6355, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.83170355120947, |
|
"grad_norm": 0.1305018961429596, |
|
"learning_rate": 1.163886320976579e-06, |
|
"loss": 0.6691, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.833762223365929, |
|
"grad_norm": 0.131569966673851, |
|
"learning_rate": 1.1598887122553061e-06, |
|
"loss": 0.6731, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.835820895522388, |
|
"grad_norm": 0.14277830719947815, |
|
"learning_rate": 1.1559395815122975e-06, |
|
"loss": 0.6614, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.837879567678847, |
|
"grad_norm": 0.15379874408245087, |
|
"learning_rate": 1.1520389728583763e-06, |
|
"loss": 0.6481, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.8399382398353064, |
|
"grad_norm": 0.13738073408603668, |
|
"learning_rate": 1.1481869298623837e-06, |
|
"loss": 0.6665, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.8419969119917652, |
|
"grad_norm": 0.1339586228132248, |
|
"learning_rate": 1.1443834955506942e-06, |
|
"loss": 0.6573, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.8440555841482245, |
|
"grad_norm": 0.14855796098709106, |
|
"learning_rate": 1.140628712406736e-06, |
|
"loss": 0.6795, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.8461142563046835, |
|
"grad_norm": 0.132362961769104, |
|
"learning_rate": 1.1369226223705176e-06, |
|
"loss": 0.6795, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.8481729284611426, |
|
"grad_norm": 0.1348879635334015, |
|
"learning_rate": 1.133265266838153e-06, |
|
"loss": 0.6529, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.8502316006176016, |
|
"grad_norm": 0.14411653578281403, |
|
"learning_rate": 1.1296566866614067e-06, |
|
"loss": 0.6538, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.8522902727740607, |
|
"grad_norm": 0.14565573632717133, |
|
"learning_rate": 1.1260969221472352e-06, |
|
"loss": 0.6572, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.85434894493052, |
|
"grad_norm": 0.1327780932188034, |
|
"learning_rate": 1.1225860130573334e-06, |
|
"loss": 0.6655, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8564076170869788, |
|
"grad_norm": 0.14732906222343445, |
|
"learning_rate": 1.1191239986076947e-06, |
|
"loss": 0.6755, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.858466289243438, |
|
"grad_norm": 0.15718206763267517, |
|
"learning_rate": 1.1157109174681713e-06, |
|
"loss": 0.6408, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.860524961399897, |
|
"grad_norm": 0.12371277809143066, |
|
"learning_rate": 1.1123468077620423e-06, |
|
"loss": 0.6387, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.8625836335563561, |
|
"grad_norm": 0.12767182290554047, |
|
"learning_rate": 1.109031707065588e-06, |
|
"loss": 0.647, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.8646423057128152, |
|
"grad_norm": 0.13031242787837982, |
|
"learning_rate": 1.1057656524076691e-06, |
|
"loss": 0.6287, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.8667009778692742, |
|
"grad_norm": 0.14342977106571198, |
|
"learning_rate": 1.1025486802693158e-06, |
|
"loss": 0.6728, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.8687596500257335, |
|
"grad_norm": 0.13070468604564667, |
|
"learning_rate": 1.099380826583316e-06, |
|
"loss": 0.6233, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.8708183221821924, |
|
"grad_norm": 0.145916149020195, |
|
"learning_rate": 1.0962621267338198e-06, |
|
"loss": 0.6327, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.8728769943386516, |
|
"grad_norm": 0.14288493990898132, |
|
"learning_rate": 1.0931926155559384e-06, |
|
"loss": 0.6465, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.8749356664951107, |
|
"grad_norm": 0.1527261734008789, |
|
"learning_rate": 1.0901723273353599e-06, |
|
"loss": 0.6797, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.8769943386515697, |
|
"grad_norm": 0.13376379013061523, |
|
"learning_rate": 1.0872012958079609e-06, |
|
"loss": 0.6446, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8790530108080288, |
|
"grad_norm": 0.13035869598388672, |
|
"learning_rate": 1.0842795541594354e-06, |
|
"loss": 0.6892, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8811116829644878, |
|
"grad_norm": 0.13996127247810364, |
|
"learning_rate": 1.0814071350249213e-06, |
|
"loss": 0.6529, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.883170355120947, |
|
"grad_norm": 0.14141958951950073, |
|
"learning_rate": 1.078584070488635e-06, |
|
"loss": 0.6645, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.885229027277406, |
|
"grad_norm": 0.13510560989379883, |
|
"learning_rate": 1.0758103920835144e-06, |
|
"loss": 0.6366, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8872876994338652, |
|
"grad_norm": 0.15664884448051453, |
|
"learning_rate": 1.0730861307908677e-06, |
|
"loss": 0.6738, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.8893463715903243, |
|
"grad_norm": 0.1363374888896942, |
|
"learning_rate": 1.0704113170400253e-06, |
|
"loss": 0.6188, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8914050437467833, |
|
"grad_norm": 0.1372835487127304, |
|
"learning_rate": 1.0677859807079994e-06, |
|
"loss": 0.6599, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8934637159032424, |
|
"grad_norm": 0.1461261808872223, |
|
"learning_rate": 1.0652101511191535e-06, |
|
"loss": 0.6595, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8955223880597014, |
|
"grad_norm": 0.11850570142269135, |
|
"learning_rate": 1.0626838570448716e-06, |
|
"loss": 0.6557, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.8975810602161607, |
|
"grad_norm": 0.13033467531204224, |
|
"learning_rate": 1.0602071267032394e-06, |
|
"loss": 0.6523, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.8996397323726195, |
|
"grad_norm": 0.12491913139820099, |
|
"learning_rate": 1.057779987758727e-06, |
|
"loss": 0.6439, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.9016984045290788, |
|
"grad_norm": 0.1398879587650299, |
|
"learning_rate": 1.0554024673218808e-06, |
|
"loss": 0.6636, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.9037570766855378, |
|
"grad_norm": 0.1425112634897232, |
|
"learning_rate": 1.0530745919490201e-06, |
|
"loss": 0.6634, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.905815748841997, |
|
"grad_norm": 0.1285870522260666, |
|
"learning_rate": 1.0507963876419424e-06, |
|
"loss": 0.6532, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.907874420998456, |
|
"grad_norm": 0.13235710561275482, |
|
"learning_rate": 1.048567879847631e-06, |
|
"loss": 0.6711, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.909933093154915, |
|
"grad_norm": 0.13371641933918, |
|
"learning_rate": 1.0463890934579714e-06, |
|
"loss": 0.6642, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.9119917653113743, |
|
"grad_norm": 0.14254343509674072, |
|
"learning_rate": 1.0442600528094722e-06, |
|
"loss": 0.6514, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.914050437467833, |
|
"grad_norm": 0.13420189917087555, |
|
"learning_rate": 1.0421807816829955e-06, |
|
"loss": 0.6522, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.9161091096242924, |
|
"grad_norm": 0.1435345858335495, |
|
"learning_rate": 1.04015130330349e-06, |
|
"loss": 0.6384, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.9181677817807514, |
|
"grad_norm": 0.14257173240184784, |
|
"learning_rate": 1.0381716403397304e-06, |
|
"loss": 0.6587, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.9202264539372105, |
|
"grad_norm": 0.12754391133785248, |
|
"learning_rate": 1.0362418149040673e-06, |
|
"loss": 0.6407, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.9222851260936697, |
|
"grad_norm": 0.13912741839885712, |
|
"learning_rate": 1.0343618485521762e-06, |
|
"loss": 0.642, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.9243437982501286, |
|
"grad_norm": 0.1380196213722229, |
|
"learning_rate": 1.0325317622828216e-06, |
|
"loss": 0.6418, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.9264024704065879, |
|
"grad_norm": 0.15211521089076996, |
|
"learning_rate": 1.0307515765376167e-06, |
|
"loss": 0.6884, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.9284611425630467, |
|
"grad_norm": 0.13326020538806915, |
|
"learning_rate": 1.0290213112007999e-06, |
|
"loss": 0.6478, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.930519814719506, |
|
"grad_norm": 0.1361563354730606, |
|
"learning_rate": 1.0273409855990113e-06, |
|
"loss": 0.6719, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.932578486875965, |
|
"grad_norm": 0.1280031055212021, |
|
"learning_rate": 1.0257106185010746e-06, |
|
"loss": 0.6288, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.934637159032424, |
|
"grad_norm": 0.12487500160932541, |
|
"learning_rate": 1.0241302281177906e-06, |
|
"loss": 0.6469, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.9366958311888833, |
|
"grad_norm": 0.13708752393722534, |
|
"learning_rate": 1.0225998321017314e-06, |
|
"loss": 0.6216, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9387545033453422, |
|
"grad_norm": 0.1262640804052353, |
|
"learning_rate": 1.021119447547047e-06, |
|
"loss": 0.6456, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.9408131755018014, |
|
"grad_norm": 0.1493413746356964, |
|
"learning_rate": 1.019689090989268e-06, |
|
"loss": 0.7005, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.9428718476582603, |
|
"grad_norm": 0.1342114359140396, |
|
"learning_rate": 1.0183087784051269e-06, |
|
"loss": 0.66, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.9449305198147195, |
|
"grad_norm": 0.1446632593870163, |
|
"learning_rate": 1.0169785252123765e-06, |
|
"loss": 0.6647, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.9469891919711786, |
|
"grad_norm": 0.14843548834323883, |
|
"learning_rate": 1.015698346269618e-06, |
|
"loss": 0.648, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.9490478641276376, |
|
"grad_norm": 0.12779954075813293, |
|
"learning_rate": 1.0144682558761371e-06, |
|
"loss": 0.6595, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.951106536284097, |
|
"grad_norm": 0.14572598040103912, |
|
"learning_rate": 1.013288267771741e-06, |
|
"loss": 0.6551, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.9531652084405557, |
|
"grad_norm": 0.15109211206436157, |
|
"learning_rate": 1.0121583951366075e-06, |
|
"loss": 0.6552, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.955223880597015, |
|
"grad_norm": 0.12881575524806976, |
|
"learning_rate": 1.011078650591137e-06, |
|
"loss": 0.6818, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.9572825527534738, |
|
"grad_norm": 0.1423639953136444, |
|
"learning_rate": 1.010049046195811e-06, |
|
"loss": 0.662, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9593412249099331, |
|
"grad_norm": 0.13927972316741943, |
|
"learning_rate": 1.0090695934510577e-06, |
|
"loss": 0.6184, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.9613998970663922, |
|
"grad_norm": 0.13898450136184692, |
|
"learning_rate": 1.0081403032971245e-06, |
|
"loss": 0.6391, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.9634585692228512, |
|
"grad_norm": 0.13634267449378967, |
|
"learning_rate": 1.0072611861139538e-06, |
|
"loss": 0.6273, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.9655172413793105, |
|
"grad_norm": 0.15343570709228516, |
|
"learning_rate": 1.006432251721069e-06, |
|
"loss": 0.6302, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.9675759135357693, |
|
"grad_norm": 0.13033926486968994, |
|
"learning_rate": 1.0056535093774643e-06, |
|
"loss": 0.6322, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.9696345856922286, |
|
"grad_norm": 0.14002519845962524, |
|
"learning_rate": 1.0049249677815005e-06, |
|
"loss": 0.6646, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.9716932578486874, |
|
"grad_norm": 0.14621154963970184, |
|
"learning_rate": 1.0042466350708083e-06, |
|
"loss": 0.6923, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.9737519300051467, |
|
"grad_norm": 0.1436609923839569, |
|
"learning_rate": 1.0036185188221976e-06, |
|
"loss": 0.6494, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.9758106021616058, |
|
"grad_norm": 0.14785051345825195, |
|
"learning_rate": 1.0030406260515726e-06, |
|
"loss": 0.6472, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.9778692743180648, |
|
"grad_norm": 0.13290195167064667, |
|
"learning_rate": 1.0025129632138545e-06, |
|
"loss": 0.6662, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.979927946474524, |
|
"grad_norm": 0.14163339138031006, |
|
"learning_rate": 1.002035536202907e-06, |
|
"loss": 0.6727, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.981986618630983, |
|
"grad_norm": 0.2116987407207489, |
|
"learning_rate": 1.0016083503514734e-06, |
|
"loss": 0.6291, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.9840452907874422, |
|
"grad_norm": 0.1317073106765747, |
|
"learning_rate": 1.0012314104311142e-06, |
|
"loss": 0.6564, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.9861039629439012, |
|
"grad_norm": 0.13808391988277435, |
|
"learning_rate": 1.0009047206521559e-06, |
|
"loss": 0.6372, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9881626351003603, |
|
"grad_norm": 0.1466529816389084, |
|
"learning_rate": 1.0006282846636434e-06, |
|
"loss": 0.6741, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.9902213072568193, |
|
"grad_norm": 0.1410931497812271, |
|
"learning_rate": 1.000402105553299e-06, |
|
"loss": 0.633, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9922799794132784, |
|
"grad_norm": 0.1295842081308365, |
|
"learning_rate": 1.0002261858474878e-06, |
|
"loss": 0.6724, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.9943386515697377, |
|
"grad_norm": 0.13751675188541412, |
|
"learning_rate": 1.0001005275111895e-06, |
|
"loss": 0.6354, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.9963973237261965, |
|
"grad_norm": 0.13173769414424896, |
|
"learning_rate": 1.0000251319479768e-06, |
|
"loss": 0.6481, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.9984559958826558, |
|
"grad_norm": 0.13309703767299652, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6489, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9984559958826558, |
|
"step": 970, |
|
"total_flos": 1.3119474486813327e+19, |
|
"train_loss": 0.32624263990785657, |
|
"train_runtime": 85452.3313, |
|
"train_samples_per_second": 0.182, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 970, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3119474486813327e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|