Training in progress, step 1000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +712 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ed8e46de5ca3b16b7d4946fe6cf5403d0cd32c3c3f502e1f414e8507dc3bf50
 size 2264640

 version https://git-lfs.github.com/spec/v1
+oid sha256:995b27b879c7b0c136c4b26a6ff6d99d1336153e978506224b7adb7687eeedb6
 size 2264640

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:057901fd6d7593948715109075c12bc6c8bcda4fe4cb163a0db2068774e58cad
 size 1183674

 version https://git-lfs.github.com/spec/v1
+oid sha256:fef7346f36b906aad9f1617baee5297e2cf0e70d4d1ea5de0dc4c973364b2240
 size 1183674

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e577caed341d63f36ac89f4cdc452526772e9dd6718e0facb5410b7e4008eeda
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e29c007d609a592b9d9b4d096992334f09ca211d70d03d461df92b792f8cae36
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0428512ada8c2471b2f37ecbdd4efa5f13e3ba0e777fddbfec0396eebc36c01a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4077036d99500a708f700f75da24d51b5300e184ad35fda49dc5a4df5596cca2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5120910384068279,
   "eval_steps": 250,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6339,6 +6339,714 @@
       "learning_rate": 2.7091379149682685e-06,
       "loss": 1.598,
       "step": 900
     }
   ],
   "logging_steps": 1,
@@ -6353,12 +7061,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8440376419418112.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5689900426742532,
   "eval_steps": 250,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.7091379149682685e-06,
       "loss": 1.598,
       "step": 900
+    },
+    {
+      "epoch": 0.5126600284495021,
+      "grad_norm": 6.939188480377197,
+      "learning_rate": 2.6557085182532582e-06,
+      "loss": 1.4069,
+      "step": 901
+    },
+    {
+      "epoch": 0.5132290184921764,
+      "grad_norm": 6.613868236541748,
+      "learning_rate": 2.602796871124663e-06,
+      "loss": 2.6769,
+      "step": 902
+    },
+    {
+      "epoch": 0.5137980085348507,
+      "grad_norm": 4.967800617218018,
+      "learning_rate": 2.5504035522157854e-06,
+      "loss": 1.7265,
+      "step": 903
+    },
+    {
+      "epoch": 0.5143669985775249,
+      "grad_norm": 7.544163227081299,
+      "learning_rate": 2.4985291344915674e-06,
+      "loss": 3.8019,
+      "step": 904
+    },
+    {
+      "epoch": 0.5149359886201992,
+      "grad_norm": 6.729382514953613,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.1223,
+      "step": 905
+    },
+    {
+      "epoch": 0.5155049786628734,
+      "grad_norm": 5.194341659545898,
+      "learning_rate": 2.3963392660775575e-06,
+      "loss": 2.6233,
+      "step": 906
+    },
+    {
+      "epoch": 0.5160739687055477,
+      "grad_norm": 5.60222864151001,
+      "learning_rate": 2.3460249329197824e-06,
+      "loss": 1.6183,
+      "step": 907
+    },
+    {
+      "epoch": 0.5166429587482219,
+      "grad_norm": 5.169523239135742,
+      "learning_rate": 2.296231735998511e-06,
+      "loss": 1.4671,
+      "step": 908
+    },
+    {
+      "epoch": 0.5172119487908962,
+      "grad_norm": 8.426031112670898,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 2.8763,
+      "step": 909
+    },
+    {
+      "epoch": 0.5177809388335705,
+      "grad_norm": 6.08635139465332,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 2.1274,
+      "step": 910
+    },
+    {
+      "epoch": 0.5183499288762446,
+      "grad_norm": 8.61262321472168,
+      "learning_rate": 2.149984379426906e-06,
+      "loss": 1.795,
+      "step": 911
+    },
+    {
+      "epoch": 0.518918918918919,
+      "grad_norm": 12.403318405151367,
+      "learning_rate": 2.102281115676258e-06,
+      "loss": 1.608,
+      "step": 912
+    },
+    {
+      "epoch": 0.5194879089615931,
+      "grad_norm": 8.934358596801758,
+      "learning_rate": 2.0551016537054493e-06,
+      "loss": 3.1318,
+      "step": 913
+    },
+    {
+      "epoch": 0.5200568990042674,
+      "grad_norm": 7.507859230041504,
+      "learning_rate": 2.008446509461498e-06,
+      "loss": 1.3386,
+      "step": 914
+    },
+    {
+      "epoch": 0.5206258890469416,
+      "grad_norm": 10.2330904006958,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 1.6304,
+      "step": 915
+    },
+    {
+      "epoch": 0.5211948790896159,
+      "grad_norm": 10.44956111907959,
+      "learning_rate": 1.91671120926748e-06,
+      "loss": 2.102,
+      "step": 916
+    },
+    {
+      "epoch": 0.5217638691322902,
+      "grad_norm": 5.6211652755737305,
+      "learning_rate": 1.8716320565199618e-06,
+      "loss": 2.347,
+      "step": 917
+    },
+    {
+      "epoch": 0.5223328591749644,
+      "grad_norm": 6.046701431274414,
+      "learning_rate": 1.8270792278934302e-06,
+      "loss": 1.8603,
+      "step": 918
+    },
+    {
+      "epoch": 0.5229018492176387,
+      "grad_norm": 6.489639759063721,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 1.5991,
+      "step": 919
+    },
+    {
+      "epoch": 0.5234708392603129,
+      "grad_norm": 8.459025382995605,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 2.8278,
+      "step": 920
+    },
+    {
+      "epoch": 0.5240398293029872,
+      "grad_norm": 4.396022796630859,
+      "learning_rate": 1.696583530154794e-06,
+      "loss": 1.2431,
+      "step": 921
+    },
+    {
+      "epoch": 0.5246088193456615,
+      "grad_norm": 6.775049686431885,
+      "learning_rate": 1.6541408126006463e-06,
+      "loss": 2.4385,
+      "step": 922
+    },
+    {
+      "epoch": 0.5251778093883357,
+      "grad_norm": 4.970674514770508,
+      "learning_rate": 1.6122267976168781e-06,
+      "loss": 1.9198,
+      "step": 923
+    },
+    {
+      "epoch": 0.52574679943101,
+      "grad_norm": 4.357985019683838,
+      "learning_rate": 1.5708419435684462e-06,
+      "loss": 1.8314,
+      "step": 924
+    },
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 4.673070430755615,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.6432,
+      "step": 925
+    },
+    {
+      "epoch": 0.5268847795163585,
+      "grad_norm": 5.390334129333496,
+      "learning_rate": 1.4896615227983468e-06,
+      "loss": 3.1497,
+      "step": 926
+    },
+    {
+      "epoch": 0.5274537695590327,
+      "grad_norm": 7.182600021362305,
+      "learning_rate": 1.4498668438527597e-06,
+      "loss": 1.9414,
+      "step": 927
+    },
+    {
+      "epoch": 0.528022759601707,
+      "grad_norm": 4.548110485076904,
+      "learning_rate": 1.4106031013849496e-06,
+      "loss": 1.3904,
+      "step": 928
+    },
+    {
+      "epoch": 0.5285917496443813,
+      "grad_norm": 7.250638961791992,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 1.5528,
+      "step": 929
+    },
+    {
+      "epoch": 0.5291607396870555,
+      "grad_norm": 5.74232816696167,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 1.868,
+      "step": 930
+    },
+    {
+      "epoch": 0.5297297297297298,
+      "grad_norm": 9.61638069152832,
+      "learning_rate": 1.2960017576088446e-06,
+      "loss": 1.9816,
+      "step": 931
+    },
+    {
+      "epoch": 0.530298719772404,
+      "grad_norm": 4.908766746520996,
+      "learning_rate": 1.2588659967397e-06,
+      "loss": 2.0533,
+      "step": 932
+    },
+    {
+      "epoch": 0.5308677098150782,
+      "grad_norm": 4.330400466918945,
+      "learning_rate": 1.222263261102985e-06,
+      "loss": 2.0314,
+      "step": 933
+    },
+    {
+      "epoch": 0.5314366998577524,
+      "grad_norm": 3.70294189453125,
+      "learning_rate": 1.1861939509803687e-06,
+      "loss": 2.0148,
+      "step": 934
+    },
+    {
+      "epoch": 0.5320056899004267,
+      "grad_norm": 5.016382217407227,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 2.3735,
+      "step": 935
+    },
+    {
+      "epoch": 0.532574679943101,
+      "grad_norm": 10.169556617736816,
+      "learning_rate": 1.1156571792324211e-06,
+      "loss": 1.8206,
+      "step": 936
+    },
+    {
+      "epoch": 0.5331436699857752,
+      "grad_norm": 6.3581085205078125,
+      "learning_rate": 1.0811904889859336e-06,
+      "loss": 2.533,
+      "step": 937
+    },
+    {
+      "epoch": 0.5337126600284495,
+      "grad_norm": 6.080915451049805,
+      "learning_rate": 1.0472587670027678e-06,
+      "loss": 1.598,
+      "step": 938
+    },
+    {
+      "epoch": 0.5342816500711237,
+      "grad_norm": 6.210170745849609,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 1.828,
+      "step": 939
+    },
+    {
+      "epoch": 0.534850640113798,
+      "grad_norm": 6.622677803039551,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 1.7117,
+      "step": 940
+    },
+    {
+      "epoch": 0.5354196301564723,
+      "grad_norm": 5.1109819412231445,
+      "learning_rate": 9.486770920760668e-07,
+      "loss": 1.8321,
+      "step": 941
+    },
+    {
+      "epoch": 0.5359886201991465,
+      "grad_norm": 6.059126853942871,
+      "learning_rate": 9.168888953011989e-07,
+      "loss": 2.9244,
+      "step": 942
+    },
+    {
+      "epoch": 0.5365576102418208,
+      "grad_norm": 6.728851318359375,
+      "learning_rate": 8.856374635655695e-07,
+      "loss": 1.5337,
+      "step": 943
+    },
+    {
+      "epoch": 0.537126600284495,
+      "grad_norm": 6.851590156555176,
+      "learning_rate": 8.549231386298151e-07,
+      "loss": 2.2449,
+      "step": 944
+    },
+    {
+      "epoch": 0.5376955903271693,
+      "grad_norm": 5.476071357727051,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 1.999,
+      "step": 945
+    },
+    {
+      "epoch": 0.5382645803698435,
+      "grad_norm": 6.682757377624512,
+      "learning_rate": 7.951071468283167e-07,
+      "loss": 1.5688,
+      "step": 946
+    },
+    {
+      "epoch": 0.5388335704125178,
+      "grad_norm": 6.973927021026611,
+      "learning_rate": 7.66006134100672e-07,
+      "loss": 1.7305,
+      "step": 947
+    },
+    {
+      "epoch": 0.5394025604551921,
+      "grad_norm": 6.132145404815674,
+      "learning_rate": 7.374435364419674e-07,
+      "loss": 2.6093,
+      "step": 948
+    },
+    {
+      "epoch": 0.5399715504978663,
+      "grad_norm": 3.4302048683166504,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 1.5475,
+      "step": 949
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 6.01040506362915,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 1.9851,
+      "step": 950
+    },
+    {
+      "epoch": 0.5411095305832148,
+      "grad_norm": 5.79357385635376,
+      "learning_rate": 6.549893279788277e-07,
+      "loss": 1.2785,
+      "step": 951
+    },
+    {
+      "epoch": 0.5416785206258891,
+      "grad_norm": 6.42232084274292,
+      "learning_rate": 6.285834552247128e-07,
+      "loss": 1.6813,
+      "step": 952
+    },
+    {
+      "epoch": 0.5422475106685632,
+      "grad_norm": 6.847588539123535,
+      "learning_rate": 6.027175003719354e-07,
+      "loss": 1.7042,
+      "step": 953
+    },
+    {
+      "epoch": 0.5428165007112375,
+      "grad_norm": 8.760133743286133,
+      "learning_rate": 5.773917462864264e-07,
+      "loss": 2.6909,
+      "step": 954
+    },
+    {
+      "epoch": 0.5433854907539118,
+      "grad_norm": 6.681099891662598,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 2.5284,
+      "step": 955
+    },
+    {
+      "epoch": 0.543954480796586,
+      "grad_norm": 8.253862380981445,
+      "learning_rate": 5.283619423401998e-07,
+      "loss": 2.5049,
+      "step": 956
+    },
+    {
+      "epoch": 0.5445234708392603,
+      "grad_norm": 6.301835536956787,
+      "learning_rate": 5.046584286615697e-07,
+      "loss": 2.0345,
+      "step": 957
+    },
+    {
+      "epoch": 0.5450924608819345,
+      "grad_norm": 8.807053565979004,
+      "learning_rate": 4.814961881085045e-07,
+      "loss": 2.1812,
+      "step": 958
+    },
+    {
+      "epoch": 0.5456614509246088,
+      "grad_norm": 6.5801897048950195,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 2.3601,
+      "step": 959
+    },
+    {
+      "epoch": 0.5462304409672831,
+      "grad_norm": 6.495395660400391,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 2.0076,
+      "step": 960
+    },
+    {
+      "epoch": 0.5467994310099573,
+      "grad_norm": 11.701818466186523,
+      "learning_rate": 4.1525960857530243e-07,
+      "loss": 2.6091,
+      "step": 961
+    },
+    {
+      "epoch": 0.5473684210526316,
+      "grad_norm": 10.334817886352539,
+      "learning_rate": 3.9426493427611177e-07,
+      "loss": 1.5223,
+      "step": 962
+    },
+    {
+      "epoch": 0.5479374110953058,
+      "grad_norm": 3.968838691711426,
+      "learning_rate": 3.738127403480507e-07,
+      "loss": 1.8091,
+      "step": 963
+    },
+    {
+      "epoch": 0.5485064011379801,
+      "grad_norm": 5.9291486740112305,
+      "learning_rate": 3.5390325045304706e-07,
+      "loss": 1.5164,
+      "step": 964
+    },
+    {
+      "epoch": 0.5490753911806543,
+      "grad_norm": 5.5870842933654785,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 1.7174,
+      "step": 965
+    },
+    {
+      "epoch": 0.5496443812233286,
+      "grad_norm": 7.495179653167725,
+      "learning_rate": 3.157132477328628e-07,
+      "loss": 1.5558,
+      "step": 966
+    },
+    {
+      "epoch": 0.5502133712660029,
+      "grad_norm": 8.937362670898438,
+      "learning_rate": 2.9743315254743833e-07,
+      "loss": 2.2747,
+      "step": 967
+    },
+    {
+      "epoch": 0.5507823613086771,
+      "grad_norm": 4.983654975891113,
+      "learning_rate": 2.796965966699927e-07,
+      "loss": 2.3196,
+      "step": 968
+    },
+    {
+      "epoch": 0.5513513513513514,
+      "grad_norm": 5.2556915283203125,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 1.9528,
+      "step": 969
+    },
+    {
+      "epoch": 0.5519203413940256,
+      "grad_norm": 5.226326942443848,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 2.6639,
+      "step": 970
+    },
+    {
+      "epoch": 0.5524893314366999,
+      "grad_norm": 7.9373345375061035,
+      "learning_rate": 2.2975007479397738e-07,
+      "loss": 1.8186,
+      "step": 971
+    },
+    {
+      "epoch": 0.5530583214793741,
+      "grad_norm": 4.213002681732178,
+      "learning_rate": 2.1418955631781202e-07,
+      "loss": 1.3597,
+      "step": 972
+    },
+    {
+      "epoch": 0.5536273115220484,
+      "grad_norm": 7.157125949859619,
+      "learning_rate": 1.9917348748826335e-07,
+      "loss": 2.8583,
+      "step": 973
+    },
+    {
+      "epoch": 0.5541963015647227,
+      "grad_norm": 6.626620292663574,
+      "learning_rate": 1.847020325186577e-07,
+      "loss": 2.1275,
+      "step": 974
+    },
+    {
+      "epoch": 0.5547652916073968,
+      "grad_norm": 7.9781317710876465,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.6131,
+      "step": 975
+    },
+    {
+      "epoch": 0.5553342816500711,
+      "grad_norm": 4.986821174621582,
+      "learning_rate": 1.5739359123178587e-07,
+      "loss": 2.0745,
+      "step": 976
+    },
+    {
+      "epoch": 0.5559032716927453,
+      "grad_norm": 6.511902809143066,
+      "learning_rate": 1.4455690355525964e-07,
+      "loss": 2.6664,
+      "step": 977
+    },
+    {
+      "epoch": 0.5564722617354196,
+      "grad_norm": 3.586979627609253,
+      "learning_rate": 1.3226542701689215e-07,
+      "loss": 2.1587,
+      "step": 978
+    },
+    {
+      "epoch": 0.5570412517780939,
+      "grad_norm": 6.518825054168701,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 2.3117,
+      "step": 979
+    },
+    {
+      "epoch": 0.5576102418207681,
+      "grad_norm": 6.538543224334717,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 1.6998,
+      "step": 980
+    },
+    {
+      "epoch": 0.5581792318634424,
+      "grad_norm": 5.692102432250977,
+      "learning_rate": 9.866357858642205e-08,
+      "loss": 2.5719,
+      "step": 981
+    },
+    {
+      "epoch": 0.5587482219061166,
+      "grad_norm": 7.857847690582275,
+      "learning_rate": 8.855423113177664e-08,
+      "loss": 2.4416,
+      "step": 982
+    },
+    {
+      "epoch": 0.5593172119487909,
+      "grad_norm": 4.545175552368164,
+      "learning_rate": 7.899070725153613e-08,
+      "loss": 2.098,
+      "step": 983
+    },
+    {
+      "epoch": 0.5598862019914651,
+      "grad_norm": 6.184070110321045,
+      "learning_rate": 6.997311153086883e-08,
+      "loss": 2.8567,
+      "step": 984
+    },
+    {
+      "epoch": 0.5604551920341394,
+      "grad_norm": 6.378092288970947,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 1.4213,
+      "step": 985
+    },
+    {
+      "epoch": 0.5610241820768137,
+      "grad_norm": 6.844626426696777,
+      "learning_rate": 5.3576093056922906e-08,
+      "loss": 2.7032,
+      "step": 986
+    },
+    {
+      "epoch": 0.5615931721194879,
+      "grad_norm": 7.550478935241699,
+      "learning_rate": 4.619684961881254e-08,
+      "loss": 1.3663,
+      "step": 987
+    },
+    {
+      "epoch": 0.5621621621621622,
+      "grad_norm": 8.875478744506836,
+      "learning_rate": 3.936389296864129e-08,
+      "loss": 3.3068,
+      "step": 988
+    },
+    {
+      "epoch": 0.5627311522048364,
+      "grad_norm": 4.764933109283447,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 1.3713,
+      "step": 989
+    },
+    {
+      "epoch": 0.5633001422475107,
+      "grad_norm": 7.228865146636963,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 2.1043,
+      "step": 990
+    },
+    {
+      "epoch": 0.5638691322901849,
+      "grad_norm": 7.009644031524658,
+      "learning_rate": 2.214346111164556e-08,
+      "loss": 1.6978,
+      "step": 991
+    },
+    {
+      "epoch": 0.5644381223328592,
+      "grad_norm": 8.948090553283691,
+      "learning_rate": 1.749633910153592e-08,
+      "loss": 2.9902,
+      "step": 992
+    },
+    {
+      "epoch": 0.5650071123755335,
+      "grad_norm": 8.406102180480957,
+      "learning_rate": 1.3395817743561134e-08,
+      "loss": 2.3606,
+      "step": 993
+    },
+    {
+      "epoch": 0.5655761024182077,
+      "grad_norm": 6.994669437408447,
+      "learning_rate": 9.841941880361916e-09,
+      "loss": 2.2296,
+      "step": 994
+    },
+    {
+      "epoch": 0.566145092460882,
+      "grad_norm": 8.03478717803955,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 1.2956,
+      "step": 995
+    },
+    {
+      "epoch": 0.5667140825035561,
+      "grad_norm": 4.596794128417969,
+      "learning_rate": 4.3742761183018784e-09,
+      "loss": 1.3163,
+      "step": 996
+    },
+    {
+      "epoch": 0.5672830725462304,
+      "grad_norm": 5.452564716339111,
+      "learning_rate": 2.4605460129556445e-09,
+      "loss": 1.6505,
+      "step": 997
+    },
+    {
+      "epoch": 0.5678520625889047,
+      "grad_norm": 8.177964210510254,
+      "learning_rate": 1.0935809887702154e-09,
+      "loss": 1.8445,
+      "step": 998
+    },
+    {
+      "epoch": 0.5684210526315789,
+      "grad_norm": 6.218096733093262,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 1.7874,
+      "step": 999
+    },
+    {
+      "epoch": 0.5689900426742532,
+      "grad_norm": 5.743197917938232,
+      "learning_rate": 0.0,
+      "loss": 2.6632,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5689900426742532,
+      "eval_loss": 2.037670850753784,
+      "eval_runtime": 13.4751,
+      "eval_samples_per_second": 54.916,
+      "eval_steps_per_second": 27.458,
+      "step": 1000
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9388052623392768.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null