Training in progress, step 1000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2264640
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:995b27b879c7b0c136c4b26a6ff6d99d1336153e978506224b7adb7687eeedb6
|
| 3 |
size 2264640
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1183674
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fef7346f36b906aad9f1617baee5297e2cf0e70d4d1ea5de0dc4c973364b2240
|
| 3 |
size 1183674
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e29c007d609a592b9d9b4d096992334f09ca211d70d03d461df92b792f8cae36
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4077036d99500a708f700f75da24d51b5300e184ad35fda49dc5a4df5596cca2
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6339,6 +6339,714 @@
|
|
| 6339 |
"learning_rate": 2.7091379149682685e-06,
|
| 6340 |
"loss": 1.598,
|
| 6341 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6342 |
}
|
| 6343 |
],
|
| 6344 |
"logging_steps": 1,
|
|
@@ -6353,12 +7061,12 @@
|
|
| 6353 |
"should_evaluate": false,
|
| 6354 |
"should_log": false,
|
| 6355 |
"should_save": true,
|
| 6356 |
-
"should_training_stop":
|
| 6357 |
},
|
| 6358 |
"attributes": {}
|
| 6359 |
}
|
| 6360 |
},
|
| 6361 |
-
"total_flos":
|
| 6362 |
"train_batch_size": 2,
|
| 6363 |
"trial_name": null,
|
| 6364 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.5689900426742532,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6339 |
"learning_rate": 2.7091379149682685e-06,
|
| 6340 |
"loss": 1.598,
|
| 6341 |
"step": 900
|
| 6342 |
+
},
|
| 6343 |
+
{
|
| 6344 |
+
"epoch": 0.5126600284495021,
|
| 6345 |
+
"grad_norm": 6.939188480377197,
|
| 6346 |
+
"learning_rate": 2.6557085182532582e-06,
|
| 6347 |
+
"loss": 1.4069,
|
| 6348 |
+
"step": 901
|
| 6349 |
+
},
|
| 6350 |
+
{
|
| 6351 |
+
"epoch": 0.5132290184921764,
|
| 6352 |
+
"grad_norm": 6.613868236541748,
|
| 6353 |
+
"learning_rate": 2.602796871124663e-06,
|
| 6354 |
+
"loss": 2.6769,
|
| 6355 |
+
"step": 902
|
| 6356 |
+
},
|
| 6357 |
+
{
|
| 6358 |
+
"epoch": 0.5137980085348507,
|
| 6359 |
+
"grad_norm": 4.967800617218018,
|
| 6360 |
+
"learning_rate": 2.5504035522157854e-06,
|
| 6361 |
+
"loss": 1.7265,
|
| 6362 |
+
"step": 903
|
| 6363 |
+
},
|
| 6364 |
+
{
|
| 6365 |
+
"epoch": 0.5143669985775249,
|
| 6366 |
+
"grad_norm": 7.544163227081299,
|
| 6367 |
+
"learning_rate": 2.4985291344915674e-06,
|
| 6368 |
+
"loss": 3.8019,
|
| 6369 |
+
"step": 904
|
| 6370 |
+
},
|
| 6371 |
+
{
|
| 6372 |
+
"epoch": 0.5149359886201992,
|
| 6373 |
+
"grad_norm": 6.729382514953613,
|
| 6374 |
+
"learning_rate": 2.4471741852423237e-06,
|
| 6375 |
+
"loss": 1.1223,
|
| 6376 |
+
"step": 905
|
| 6377 |
+
},
|
| 6378 |
+
{
|
| 6379 |
+
"epoch": 0.5155049786628734,
|
| 6380 |
+
"grad_norm": 5.194341659545898,
|
| 6381 |
+
"learning_rate": 2.3963392660775575e-06,
|
| 6382 |
+
"loss": 2.6233,
|
| 6383 |
+
"step": 906
|
| 6384 |
+
},
|
| 6385 |
+
{
|
| 6386 |
+
"epoch": 0.5160739687055477,
|
| 6387 |
+
"grad_norm": 5.60222864151001,
|
| 6388 |
+
"learning_rate": 2.3460249329197824e-06,
|
| 6389 |
+
"loss": 1.6183,
|
| 6390 |
+
"step": 907
|
| 6391 |
+
},
|
| 6392 |
+
{
|
| 6393 |
+
"epoch": 0.5166429587482219,
|
| 6394 |
+
"grad_norm": 5.169523239135742,
|
| 6395 |
+
"learning_rate": 2.296231735998511e-06,
|
| 6396 |
+
"loss": 1.4671,
|
| 6397 |
+
"step": 908
|
| 6398 |
+
},
|
| 6399 |
+
{
|
| 6400 |
+
"epoch": 0.5172119487908962,
|
| 6401 |
+
"grad_norm": 8.426031112670898,
|
| 6402 |
+
"learning_rate": 2.2469602198441573e-06,
|
| 6403 |
+
"loss": 2.8763,
|
| 6404 |
+
"step": 909
|
| 6405 |
+
},
|
| 6406 |
+
{
|
| 6407 |
+
"epoch": 0.5177809388335705,
|
| 6408 |
+
"grad_norm": 6.08635139465332,
|
| 6409 |
+
"learning_rate": 2.1982109232821178e-06,
|
| 6410 |
+
"loss": 2.1274,
|
| 6411 |
+
"step": 910
|
| 6412 |
+
},
|
| 6413 |
+
{
|
| 6414 |
+
"epoch": 0.5183499288762446,
|
| 6415 |
+
"grad_norm": 8.61262321472168,
|
| 6416 |
+
"learning_rate": 2.149984379426906e-06,
|
| 6417 |
+
"loss": 1.795,
|
| 6418 |
+
"step": 911
|
| 6419 |
+
},
|
| 6420 |
+
{
|
| 6421 |
+
"epoch": 0.518918918918919,
|
| 6422 |
+
"grad_norm": 12.403318405151367,
|
| 6423 |
+
"learning_rate": 2.102281115676258e-06,
|
| 6424 |
+
"loss": 1.608,
|
| 6425 |
+
"step": 912
|
| 6426 |
+
},
|
| 6427 |
+
{
|
| 6428 |
+
"epoch": 0.5194879089615931,
|
| 6429 |
+
"grad_norm": 8.934358596801758,
|
| 6430 |
+
"learning_rate": 2.0551016537054493e-06,
|
| 6431 |
+
"loss": 3.1318,
|
| 6432 |
+
"step": 913
|
| 6433 |
+
},
|
| 6434 |
+
{
|
| 6435 |
+
"epoch": 0.5200568990042674,
|
| 6436 |
+
"grad_norm": 7.507859230041504,
|
| 6437 |
+
"learning_rate": 2.008446509461498e-06,
|
| 6438 |
+
"loss": 1.3386,
|
| 6439 |
+
"step": 914
|
| 6440 |
+
},
|
| 6441 |
+
{
|
| 6442 |
+
"epoch": 0.5206258890469416,
|
| 6443 |
+
"grad_norm": 10.2330904006958,
|
| 6444 |
+
"learning_rate": 1.962316193157593e-06,
|
| 6445 |
+
"loss": 1.6304,
|
| 6446 |
+
"step": 915
|
| 6447 |
+
},
|
| 6448 |
+
{
|
| 6449 |
+
"epoch": 0.5211948790896159,
|
| 6450 |
+
"grad_norm": 10.44956111907959,
|
| 6451 |
+
"learning_rate": 1.91671120926748e-06,
|
| 6452 |
+
"loss": 2.102,
|
| 6453 |
+
"step": 916
|
| 6454 |
+
},
|
| 6455 |
+
{
|
| 6456 |
+
"epoch": 0.5217638691322902,
|
| 6457 |
+
"grad_norm": 5.6211652755737305,
|
| 6458 |
+
"learning_rate": 1.8716320565199618e-06,
|
| 6459 |
+
"loss": 2.347,
|
| 6460 |
+
"step": 917
|
| 6461 |
+
},
|
| 6462 |
+
{
|
| 6463 |
+
"epoch": 0.5223328591749644,
|
| 6464 |
+
"grad_norm": 6.046701431274414,
|
| 6465 |
+
"learning_rate": 1.8270792278934302e-06,
|
| 6466 |
+
"loss": 1.8603,
|
| 6467 |
+
"step": 918
|
| 6468 |
+
},
|
| 6469 |
+
{
|
| 6470 |
+
"epoch": 0.5229018492176387,
|
| 6471 |
+
"grad_norm": 6.489639759063721,
|
| 6472 |
+
"learning_rate": 1.7830532106104747e-06,
|
| 6473 |
+
"loss": 1.5991,
|
| 6474 |
+
"step": 919
|
| 6475 |
+
},
|
| 6476 |
+
{
|
| 6477 |
+
"epoch": 0.5234708392603129,
|
| 6478 |
+
"grad_norm": 8.459025382995605,
|
| 6479 |
+
"learning_rate": 1.7395544861325718e-06,
|
| 6480 |
+
"loss": 2.8278,
|
| 6481 |
+
"step": 920
|
| 6482 |
+
},
|
| 6483 |
+
{
|
| 6484 |
+
"epoch": 0.5240398293029872,
|
| 6485 |
+
"grad_norm": 4.396022796630859,
|
| 6486 |
+
"learning_rate": 1.696583530154794e-06,
|
| 6487 |
+
"loss": 1.2431,
|
| 6488 |
+
"step": 921
|
| 6489 |
+
},
|
| 6490 |
+
{
|
| 6491 |
+
"epoch": 0.5246088193456615,
|
| 6492 |
+
"grad_norm": 6.775049686431885,
|
| 6493 |
+
"learning_rate": 1.6541408126006463e-06,
|
| 6494 |
+
"loss": 2.4385,
|
| 6495 |
+
"step": 922
|
| 6496 |
+
},
|
| 6497 |
+
{
|
| 6498 |
+
"epoch": 0.5251778093883357,
|
| 6499 |
+
"grad_norm": 4.970674514770508,
|
| 6500 |
+
"learning_rate": 1.6122267976168781e-06,
|
| 6501 |
+
"loss": 1.9198,
|
| 6502 |
+
"step": 923
|
| 6503 |
+
},
|
| 6504 |
+
{
|
| 6505 |
+
"epoch": 0.52574679943101,
|
| 6506 |
+
"grad_norm": 4.357985019683838,
|
| 6507 |
+
"learning_rate": 1.5708419435684462e-06,
|
| 6508 |
+
"loss": 1.8314,
|
| 6509 |
+
"step": 924
|
| 6510 |
+
},
|
| 6511 |
+
{
|
| 6512 |
+
"epoch": 0.5263157894736842,
|
| 6513 |
+
"grad_norm": 4.673070430755615,
|
| 6514 |
+
"learning_rate": 1.5299867030334814e-06,
|
| 6515 |
+
"loss": 1.6432,
|
| 6516 |
+
"step": 925
|
| 6517 |
+
},
|
| 6518 |
+
{
|
| 6519 |
+
"epoch": 0.5268847795163585,
|
| 6520 |
+
"grad_norm": 5.390334129333496,
|
| 6521 |
+
"learning_rate": 1.4896615227983468e-06,
|
| 6522 |
+
"loss": 3.1497,
|
| 6523 |
+
"step": 926
|
| 6524 |
+
},
|
| 6525 |
+
{
|
| 6526 |
+
"epoch": 0.5274537695590327,
|
| 6527 |
+
"grad_norm": 7.182600021362305,
|
| 6528 |
+
"learning_rate": 1.4498668438527597e-06,
|
| 6529 |
+
"loss": 1.9414,
|
| 6530 |
+
"step": 927
|
| 6531 |
+
},
|
| 6532 |
+
{
|
| 6533 |
+
"epoch": 0.528022759601707,
|
| 6534 |
+
"grad_norm": 4.548110485076904,
|
| 6535 |
+
"learning_rate": 1.4106031013849496e-06,
|
| 6536 |
+
"loss": 1.3904,
|
| 6537 |
+
"step": 928
|
| 6538 |
+
},
|
| 6539 |
+
{
|
| 6540 |
+
"epoch": 0.5285917496443813,
|
| 6541 |
+
"grad_norm": 7.250638961791992,
|
| 6542 |
+
"learning_rate": 1.3718707247769135e-06,
|
| 6543 |
+
"loss": 1.5528,
|
| 6544 |
+
"step": 929
|
| 6545 |
+
},
|
| 6546 |
+
{
|
| 6547 |
+
"epoch": 0.5291607396870555,
|
| 6548 |
+
"grad_norm": 5.74232816696167,
|
| 6549 |
+
"learning_rate": 1.333670137599713e-06,
|
| 6550 |
+
"loss": 1.868,
|
| 6551 |
+
"step": 930
|
| 6552 |
+
},
|
| 6553 |
+
{
|
| 6554 |
+
"epoch": 0.5297297297297298,
|
| 6555 |
+
"grad_norm": 9.61638069152832,
|
| 6556 |
+
"learning_rate": 1.2960017576088446e-06,
|
| 6557 |
+
"loss": 1.9816,
|
| 6558 |
+
"step": 931
|
| 6559 |
+
},
|
| 6560 |
+
{
|
| 6561 |
+
"epoch": 0.530298719772404,
|
| 6562 |
+
"grad_norm": 4.908766746520996,
|
| 6563 |
+
"learning_rate": 1.2588659967397e-06,
|
| 6564 |
+
"loss": 2.0533,
|
| 6565 |
+
"step": 932
|
| 6566 |
+
},
|
| 6567 |
+
{
|
| 6568 |
+
"epoch": 0.5308677098150782,
|
| 6569 |
+
"grad_norm": 4.330400466918945,
|
| 6570 |
+
"learning_rate": 1.222263261102985e-06,
|
| 6571 |
+
"loss": 2.0314,
|
| 6572 |
+
"step": 933
|
| 6573 |
+
},
|
| 6574 |
+
{
|
| 6575 |
+
"epoch": 0.5314366998577524,
|
| 6576 |
+
"grad_norm": 3.70294189453125,
|
| 6577 |
+
"learning_rate": 1.1861939509803687e-06,
|
| 6578 |
+
"loss": 2.0148,
|
| 6579 |
+
"step": 934
|
| 6580 |
+
},
|
| 6581 |
+
{
|
| 6582 |
+
"epoch": 0.5320056899004267,
|
| 6583 |
+
"grad_norm": 5.016382217407227,
|
| 6584 |
+
"learning_rate": 1.1506584608200367e-06,
|
| 6585 |
+
"loss": 2.3735,
|
| 6586 |
+
"step": 935
|
| 6587 |
+
},
|
| 6588 |
+
{
|
| 6589 |
+
"epoch": 0.532574679943101,
|
| 6590 |
+
"grad_norm": 10.169556617736816,
|
| 6591 |
+
"learning_rate": 1.1156571792324211e-06,
|
| 6592 |
+
"loss": 1.8206,
|
| 6593 |
+
"step": 936
|
| 6594 |
+
},
|
| 6595 |
+
{
|
| 6596 |
+
"epoch": 0.5331436699857752,
|
| 6597 |
+
"grad_norm": 6.3581085205078125,
|
| 6598 |
+
"learning_rate": 1.0811904889859336e-06,
|
| 6599 |
+
"loss": 2.533,
|
| 6600 |
+
"step": 937
|
| 6601 |
+
},
|
| 6602 |
+
{
|
| 6603 |
+
"epoch": 0.5337126600284495,
|
| 6604 |
+
"grad_norm": 6.080915451049805,
|
| 6605 |
+
"learning_rate": 1.0472587670027678e-06,
|
| 6606 |
+
"loss": 1.598,
|
| 6607 |
+
"step": 938
|
| 6608 |
+
},
|
| 6609 |
+
{
|
| 6610 |
+
"epoch": 0.5342816500711237,
|
| 6611 |
+
"grad_norm": 6.210170745849609,
|
| 6612 |
+
"learning_rate": 1.0138623843548078e-06,
|
| 6613 |
+
"loss": 1.828,
|
| 6614 |
+
"step": 939
|
| 6615 |
+
},
|
| 6616 |
+
{
|
| 6617 |
+
"epoch": 0.534850640113798,
|
| 6618 |
+
"grad_norm": 6.622677803039551,
|
| 6619 |
+
"learning_rate": 9.810017062595322e-07,
|
| 6620 |
+
"loss": 1.7117,
|
| 6621 |
+
"step": 940
|
| 6622 |
+
},
|
| 6623 |
+
{
|
| 6624 |
+
"epoch": 0.5354196301564723,
|
| 6625 |
+
"grad_norm": 5.1109819412231445,
|
| 6626 |
+
"learning_rate": 9.486770920760668e-07,
|
| 6627 |
+
"loss": 1.8321,
|
| 6628 |
+
"step": 941
|
| 6629 |
+
},
|
| 6630 |
+
{
|
| 6631 |
+
"epoch": 0.5359886201991465,
|
| 6632 |
+
"grad_norm": 6.059126853942871,
|
| 6633 |
+
"learning_rate": 9.168888953011989e-07,
|
| 6634 |
+
"loss": 2.9244,
|
| 6635 |
+
"step": 942
|
| 6636 |
+
},
|
| 6637 |
+
{
|
| 6638 |
+
"epoch": 0.5365576102418208,
|
| 6639 |
+
"grad_norm": 6.728851318359375,
|
| 6640 |
+
"learning_rate": 8.856374635655695e-07,
|
| 6641 |
+
"loss": 1.5337,
|
| 6642 |
+
"step": 943
|
| 6643 |
+
},
|
| 6644 |
+
{
|
| 6645 |
+
"epoch": 0.537126600284495,
|
| 6646 |
+
"grad_norm": 6.851590156555176,
|
| 6647 |
+
"learning_rate": 8.549231386298151e-07,
|
| 6648 |
+
"loss": 2.2449,
|
| 6649 |
+
"step": 944
|
| 6650 |
+
},
|
| 6651 |
+
{
|
| 6652 |
+
"epoch": 0.5376955903271693,
|
| 6653 |
+
"grad_norm": 5.476071357727051,
|
| 6654 |
+
"learning_rate": 8.247462563808817e-07,
|
| 6655 |
+
"loss": 1.999,
|
| 6656 |
+
"step": 945
|
| 6657 |
+
},
|
| 6658 |
+
{
|
| 6659 |
+
"epoch": 0.5382645803698435,
|
| 6660 |
+
"grad_norm": 6.682757377624512,
|
| 6661 |
+
"learning_rate": 7.951071468283167e-07,
|
| 6662 |
+
"loss": 1.5688,
|
| 6663 |
+
"step": 946
|
| 6664 |
+
},
|
| 6665 |
+
{
|
| 6666 |
+
"epoch": 0.5388335704125178,
|
| 6667 |
+
"grad_norm": 6.973927021026611,
|
| 6668 |
+
"learning_rate": 7.66006134100672e-07,
|
| 6669 |
+
"loss": 1.7305,
|
| 6670 |
+
"step": 947
|
| 6671 |
+
},
|
| 6672 |
+
{
|
| 6673 |
+
"epoch": 0.5394025604551921,
|
| 6674 |
+
"grad_norm": 6.132145404815674,
|
| 6675 |
+
"learning_rate": 7.374435364419674e-07,
|
| 6676 |
+
"loss": 2.6093,
|
| 6677 |
+
"step": 948
|
| 6678 |
+
},
|
| 6679 |
+
{
|
| 6680 |
+
"epoch": 0.5399715504978663,
|
| 6681 |
+
"grad_norm": 3.4302048683166504,
|
| 6682 |
+
"learning_rate": 7.094196662081831e-07,
|
| 6683 |
+
"loss": 1.5475,
|
| 6684 |
+
"step": 949
|
| 6685 |
+
},
|
| 6686 |
+
{
|
| 6687 |
+
"epoch": 0.5405405405405406,
|
| 6688 |
+
"grad_norm": 6.01040506362915,
|
| 6689 |
+
"learning_rate": 6.819348298638839e-07,
|
| 6690 |
+
"loss": 1.9851,
|
| 6691 |
+
"step": 950
|
| 6692 |
+
},
|
| 6693 |
+
{
|
| 6694 |
+
"epoch": 0.5411095305832148,
|
| 6695 |
+
"grad_norm": 5.79357385635376,
|
| 6696 |
+
"learning_rate": 6.549893279788277e-07,
|
| 6697 |
+
"loss": 1.2785,
|
| 6698 |
+
"step": 951
|
| 6699 |
+
},
|
| 6700 |
+
{
|
| 6701 |
+
"epoch": 0.5416785206258891,
|
| 6702 |
+
"grad_norm": 6.42232084274292,
|
| 6703 |
+
"learning_rate": 6.285834552247128e-07,
|
| 6704 |
+
"loss": 1.6813,
|
| 6705 |
+
"step": 952
|
| 6706 |
+
},
|
| 6707 |
+
{
|
| 6708 |
+
"epoch": 0.5422475106685632,
|
| 6709 |
+
"grad_norm": 6.847588539123535,
|
| 6710 |
+
"learning_rate": 6.027175003719354e-07,
|
| 6711 |
+
"loss": 1.7042,
|
| 6712 |
+
"step": 953
|
| 6713 |
+
},
|
| 6714 |
+
{
|
| 6715 |
+
"epoch": 0.5428165007112375,
|
| 6716 |
+
"grad_norm": 8.760133743286133,
|
| 6717 |
+
"learning_rate": 5.773917462864264e-07,
|
| 6718 |
+
"loss": 2.6909,
|
| 6719 |
+
"step": 954
|
| 6720 |
+
},
|
| 6721 |
+
{
|
| 6722 |
+
"epoch": 0.5433854907539118,
|
| 6723 |
+
"grad_norm": 6.681099891662598,
|
| 6724 |
+
"learning_rate": 5.526064699265753e-07,
|
| 6725 |
+
"loss": 2.5284,
|
| 6726 |
+
"step": 955
|
| 6727 |
+
},
|
| 6728 |
+
{
|
| 6729 |
+
"epoch": 0.543954480796586,
|
| 6730 |
+
"grad_norm": 8.253862380981445,
|
| 6731 |
+
"learning_rate": 5.283619423401998e-07,
|
| 6732 |
+
"loss": 2.5049,
|
| 6733 |
+
"step": 956
|
| 6734 |
+
},
|
| 6735 |
+
{
|
| 6736 |
+
"epoch": 0.5445234708392603,
|
| 6737 |
+
"grad_norm": 6.301835536956787,
|
| 6738 |
+
"learning_rate": 5.046584286615697e-07,
|
| 6739 |
+
"loss": 2.0345,
|
| 6740 |
+
"step": 957
|
| 6741 |
+
},
|
| 6742 |
+
{
|
| 6743 |
+
"epoch": 0.5450924608819345,
|
| 6744 |
+
"grad_norm": 8.807053565979004,
|
| 6745 |
+
"learning_rate": 4.814961881085045e-07,
|
| 6746 |
+
"loss": 2.1812,
|
| 6747 |
+
"step": 958
|
| 6748 |
+
},
|
| 6749 |
+
{
|
| 6750 |
+
"epoch": 0.5456614509246088,
|
| 6751 |
+
"grad_norm": 6.5801897048950195,
|
| 6752 |
+
"learning_rate": 4.5887547397955864e-07,
|
| 6753 |
+
"loss": 2.3601,
|
| 6754 |
+
"step": 959
|
| 6755 |
+
},
|
| 6756 |
+
{
|
| 6757 |
+
"epoch": 0.5462304409672831,
|
| 6758 |
+
"grad_norm": 6.495395660400391,
|
| 6759 |
+
"learning_rate": 4.367965336512403e-07,
|
| 6760 |
+
"loss": 2.0076,
|
| 6761 |
+
"step": 960
|
| 6762 |
+
},
|
| 6763 |
+
{
|
| 6764 |
+
"epoch": 0.5467994310099573,
|
| 6765 |
+
"grad_norm": 11.701818466186523,
|
| 6766 |
+
"learning_rate": 4.1525960857530243e-07,
|
| 6767 |
+
"loss": 2.6091,
|
| 6768 |
+
"step": 961
|
| 6769 |
+
},
|
| 6770 |
+
{
|
| 6771 |
+
"epoch": 0.5473684210526316,
|
| 6772 |
+
"grad_norm": 10.334817886352539,
|
| 6773 |
+
"learning_rate": 3.9426493427611177e-07,
|
| 6774 |
+
"loss": 1.5223,
|
| 6775 |
+
"step": 962
|
| 6776 |
+
},
|
| 6777 |
+
{
|
| 6778 |
+
"epoch": 0.5479374110953058,
|
| 6779 |
+
"grad_norm": 3.968838691711426,
|
| 6780 |
+
"learning_rate": 3.738127403480507e-07,
|
| 6781 |
+
"loss": 1.8091,
|
| 6782 |
+
"step": 963
|
| 6783 |
+
},
|
| 6784 |
+
{
|
| 6785 |
+
"epoch": 0.5485064011379801,
|
| 6786 |
+
"grad_norm": 5.9291486740112305,
|
| 6787 |
+
"learning_rate": 3.5390325045304706e-07,
|
| 6788 |
+
"loss": 1.5164,
|
| 6789 |
+
"step": 964
|
| 6790 |
+
},
|
| 6791 |
+
{
|
| 6792 |
+
"epoch": 0.5490753911806543,
|
| 6793 |
+
"grad_norm": 5.5870842933654785,
|
| 6794 |
+
"learning_rate": 3.3453668231809286e-07,
|
| 6795 |
+
"loss": 1.7174,
|
| 6796 |
+
"step": 965
|
| 6797 |
+
},
|
| 6798 |
+
{
|
| 6799 |
+
"epoch": 0.5496443812233286,
|
| 6800 |
+
"grad_norm": 7.495179653167725,
|
| 6801 |
+
"learning_rate": 3.157132477328628e-07,
|
| 6802 |
+
"loss": 1.5558,
|
| 6803 |
+
"step": 966
|
| 6804 |
+
},
|
| 6805 |
+
{
|
| 6806 |
+
"epoch": 0.5502133712660029,
|
| 6807 |
+
"grad_norm": 8.937362670898438,
|
| 6808 |
+
"learning_rate": 2.9743315254743833e-07,
|
| 6809 |
+
"loss": 2.2747,
|
| 6810 |
+
"step": 967
|
| 6811 |
+
},
|
| 6812 |
+
{
|
| 6813 |
+
"epoch": 0.5507823613086771,
|
| 6814 |
+
"grad_norm": 4.983654975891113,
|
| 6815 |
+
"learning_rate": 2.796965966699927e-07,
|
| 6816 |
+
"loss": 2.3196,
|
| 6817 |
+
"step": 968
|
| 6818 |
+
},
|
| 6819 |
+
{
|
| 6820 |
+
"epoch": 0.5513513513513514,
|
| 6821 |
+
"grad_norm": 5.2556915283203125,
|
| 6822 |
+
"learning_rate": 2.625037740646763e-07,
|
| 6823 |
+
"loss": 1.9528,
|
| 6824 |
+
"step": 969
|
| 6825 |
+
},
|
| 6826 |
+
{
|
| 6827 |
+
"epoch": 0.5519203413940256,
|
| 6828 |
+
"grad_norm": 5.226326942443848,
|
| 6829 |
+
"learning_rate": 2.458548727494292e-07,
|
| 6830 |
+
"loss": 2.6639,
|
| 6831 |
+
"step": 970
|
| 6832 |
+
},
|
| 6833 |
+
{
|
| 6834 |
+
"epoch": 0.5524893314366999,
|
| 6835 |
+
"grad_norm": 7.9373345375061035,
|
| 6836 |
+
"learning_rate": 2.2975007479397738e-07,
|
| 6837 |
+
"loss": 1.8186,
|
| 6838 |
+
"step": 971
|
| 6839 |
+
},
|
| 6840 |
+
{
|
| 6841 |
+
"epoch": 0.5530583214793741,
|
| 6842 |
+
"grad_norm": 4.213002681732178,
|
| 6843 |
+
"learning_rate": 2.1418955631781202e-07,
|
| 6844 |
+
"loss": 1.3597,
|
| 6845 |
+
"step": 972
|
| 6846 |
+
},
|
| 6847 |
+
{
|
| 6848 |
+
"epoch": 0.5536273115220484,
|
| 6849 |
+
"grad_norm": 7.157125949859619,
|
| 6850 |
+
"learning_rate": 1.9917348748826335e-07,
|
| 6851 |
+
"loss": 2.8583,
|
| 6852 |
+
"step": 973
|
| 6853 |
+
},
|
| 6854 |
+
{
|
| 6855 |
+
"epoch": 0.5541963015647227,
|
| 6856 |
+
"grad_norm": 6.626620292663574,
|
| 6857 |
+
"learning_rate": 1.847020325186577e-07,
|
| 6858 |
+
"loss": 2.1275,
|
| 6859 |
+
"step": 974
|
| 6860 |
+
},
|
| 6861 |
+
{
|
| 6862 |
+
"epoch": 0.5547652916073968,
|
| 6863 |
+
"grad_norm": 7.9781317710876465,
|
| 6864 |
+
"learning_rate": 1.7077534966650766e-07,
|
| 6865 |
+
"loss": 1.6131,
|
| 6866 |
+
"step": 975
|
| 6867 |
+
},
|
| 6868 |
+
{
|
| 6869 |
+
"epoch": 0.5553342816500711,
|
| 6870 |
+
"grad_norm": 4.986821174621582,
|
| 6871 |
+
"learning_rate": 1.5739359123178587e-07,
|
| 6872 |
+
"loss": 2.0745,
|
| 6873 |
+
"step": 976
|
| 6874 |
+
},
|
| 6875 |
+
{
|
| 6876 |
+
"epoch": 0.5559032716927453,
|
| 6877 |
+
"grad_norm": 6.511902809143066,
|
| 6878 |
+
"learning_rate": 1.4455690355525964e-07,
|
| 6879 |
+
"loss": 2.6664,
|
| 6880 |
+
"step": 977
|
| 6881 |
+
},
|
| 6882 |
+
{
|
| 6883 |
+
"epoch": 0.5564722617354196,
|
| 6884 |
+
"grad_norm": 3.586979627609253,
|
| 6885 |
+
"learning_rate": 1.3226542701689215e-07,
|
| 6886 |
+
"loss": 2.1587,
|
| 6887 |
+
"step": 978
|
| 6888 |
+
},
|
| 6889 |
+
{
|
| 6890 |
+
"epoch": 0.5570412517780939,
|
| 6891 |
+
"grad_norm": 6.518825054168701,
|
| 6892 |
+
"learning_rate": 1.2051929603428825e-07,
|
| 6893 |
+
"loss": 2.3117,
|
| 6894 |
+
"step": 979
|
| 6895 |
+
},
|
| 6896 |
+
{
|
| 6897 |
+
"epoch": 0.5576102418207681,
|
| 6898 |
+
"grad_norm": 6.538543224334717,
|
| 6899 |
+
"learning_rate": 1.0931863906127327e-07,
|
| 6900 |
+
"loss": 1.6998,
|
| 6901 |
+
"step": 980
|
| 6902 |
+
},
|
| 6903 |
+
{
|
| 6904 |
+
"epoch": 0.5581792318634424,
|
| 6905 |
+
"grad_norm": 5.692102432250977,
|
| 6906 |
+
"learning_rate": 9.866357858642205e-08,
|
| 6907 |
+
"loss": 2.5719,
|
| 6908 |
+
"step": 981
|
| 6909 |
+
},
|
| 6910 |
+
{
|
| 6911 |
+
"epoch": 0.5587482219061166,
|
| 6912 |
+
"grad_norm": 7.857847690582275,
|
| 6913 |
+
"learning_rate": 8.855423113177664e-08,
|
| 6914 |
+
"loss": 2.4416,
|
| 6915 |
+
"step": 982
|
| 6916 |
+
},
|
| 6917 |
+
{
|
| 6918 |
+
"epoch": 0.5593172119487909,
|
| 6919 |
+
"grad_norm": 4.545175552368164,
|
| 6920 |
+
"learning_rate": 7.899070725153613e-08,
|
| 6921 |
+
"loss": 2.098,
|
| 6922 |
+
"step": 983
|
| 6923 |
+
},
|
| 6924 |
+
{
|
| 6925 |
+
"epoch": 0.5598862019914651,
|
| 6926 |
+
"grad_norm": 6.184070110321045,
|
| 6927 |
+
"learning_rate": 6.997311153086883e-08,
|
| 6928 |
+
"loss": 2.8567,
|
| 6929 |
+
"step": 984
|
| 6930 |
+
},
|
| 6931 |
+
{
|
| 6932 |
+
"epoch": 0.5604551920341394,
|
| 6933 |
+
"grad_norm": 6.378092288970947,
|
| 6934 |
+
"learning_rate": 6.150154258476315e-08,
|
| 6935 |
+
"loss": 1.4213,
|
| 6936 |
+
"step": 985
|
| 6937 |
+
},
|
| 6938 |
+
{
|
| 6939 |
+
"epoch": 0.5610241820768137,
|
| 6940 |
+
"grad_norm": 6.844626426696777,
|
| 6941 |
+
"learning_rate": 5.3576093056922906e-08,
|
| 6942 |
+
"loss": 2.7032,
|
| 6943 |
+
"step": 986
|
| 6944 |
+
},
|
| 6945 |
+
{
|
| 6946 |
+
"epoch": 0.5615931721194879,
|
| 6947 |
+
"grad_norm": 7.550478935241699,
|
| 6948 |
+
"learning_rate": 4.619684961881254e-08,
|
| 6949 |
+
"loss": 1.3663,
|
| 6950 |
+
"step": 987
|
| 6951 |
+
},
|
| 6952 |
+
{
|
| 6953 |
+
"epoch": 0.5621621621621622,
|
| 6954 |
+
"grad_norm": 8.875478744506836,
|
| 6955 |
+
"learning_rate": 3.936389296864129e-08,
|
| 6956 |
+
"loss": 3.3068,
|
| 6957 |
+
"step": 988
|
| 6958 |
+
},
|
| 6959 |
+
{
|
| 6960 |
+
"epoch": 0.5627311522048364,
|
| 6961 |
+
"grad_norm": 4.764933109283447,
|
| 6962 |
+
"learning_rate": 3.3077297830541584e-08,
|
| 6963 |
+
"loss": 1.3713,
|
| 6964 |
+
"step": 989
|
| 6965 |
+
},
|
| 6966 |
+
{
|
| 6967 |
+
"epoch": 0.5633001422475107,
|
| 6968 |
+
"grad_norm": 7.228865146636963,
|
| 6969 |
+
"learning_rate": 2.7337132953697554e-08,
|
| 6970 |
+
"loss": 2.1043,
|
| 6971 |
+
"step": 990
|
| 6972 |
+
},
|
| 6973 |
+
{
|
| 6974 |
+
"epoch": 0.5638691322901849,
|
| 6975 |
+
"grad_norm": 7.009644031524658,
|
| 6976 |
+
"learning_rate": 2.214346111164556e-08,
|
| 6977 |
+
"loss": 1.6978,
|
| 6978 |
+
"step": 991
|
| 6979 |
+
},
|
| 6980 |
+
{
|
| 6981 |
+
"epoch": 0.5644381223328592,
|
| 6982 |
+
"grad_norm": 8.948090553283691,
|
| 6983 |
+
"learning_rate": 1.749633910153592e-08,
|
| 6984 |
+
"loss": 2.9902,
|
| 6985 |
+
"step": 992
|
| 6986 |
+
},
|
| 6987 |
+
{
|
| 6988 |
+
"epoch": 0.5650071123755335,
|
| 6989 |
+
"grad_norm": 8.406102180480957,
|
| 6990 |
+
"learning_rate": 1.3395817743561134e-08,
|
| 6991 |
+
"loss": 2.3606,
|
| 6992 |
+
"step": 993
|
| 6993 |
+
},
|
| 6994 |
+
{
|
| 6995 |
+
"epoch": 0.5655761024182077,
|
| 6996 |
+
"grad_norm": 6.994669437408447,
|
| 6997 |
+
"learning_rate": 9.841941880361916e-09,
|
| 6998 |
+
"loss": 2.2296,
|
| 6999 |
+
"step": 994
|
| 7000 |
+
},
|
| 7001 |
+
{
|
| 7002 |
+
"epoch": 0.566145092460882,
|
| 7003 |
+
"grad_norm": 8.03478717803955,
|
| 7004 |
+
"learning_rate": 6.834750376549792e-09,
|
| 7005 |
+
"loss": 1.2956,
|
| 7006 |
+
"step": 995
|
| 7007 |
+
},
|
| 7008 |
+
{
|
| 7009 |
+
"epoch": 0.5667140825035561,
|
| 7010 |
+
"grad_norm": 4.596794128417969,
|
| 7011 |
+
"learning_rate": 4.3742761183018784e-09,
|
| 7012 |
+
"loss": 1.3163,
|
| 7013 |
+
"step": 996
|
| 7014 |
+
},
|
| 7015 |
+
{
|
| 7016 |
+
"epoch": 0.5672830725462304,
|
| 7017 |
+
"grad_norm": 5.452564716339111,
|
| 7018 |
+
"learning_rate": 2.4605460129556445e-09,
|
| 7019 |
+
"loss": 1.6505,
|
| 7020 |
+
"step": 997
|
| 7021 |
+
},
|
| 7022 |
+
{
|
| 7023 |
+
"epoch": 0.5678520625889047,
|
| 7024 |
+
"grad_norm": 8.177964210510254,
|
| 7025 |
+
"learning_rate": 1.0935809887702154e-09,
|
| 7026 |
+
"loss": 1.8445,
|
| 7027 |
+
"step": 998
|
| 7028 |
+
},
|
| 7029 |
+
{
|
| 7030 |
+
"epoch": 0.5684210526315789,
|
| 7031 |
+
"grad_norm": 6.218096733093262,
|
| 7032 |
+
"learning_rate": 2.7339599464326627e-10,
|
| 7033 |
+
"loss": 1.7874,
|
| 7034 |
+
"step": 999
|
| 7035 |
+
},
|
| 7036 |
+
{
|
| 7037 |
+
"epoch": 0.5689900426742532,
|
| 7038 |
+
"grad_norm": 5.743197917938232,
|
| 7039 |
+
"learning_rate": 0.0,
|
| 7040 |
+
"loss": 2.6632,
|
| 7041 |
+
"step": 1000
|
| 7042 |
+
},
|
| 7043 |
+
{
|
| 7044 |
+
"epoch": 0.5689900426742532,
|
| 7045 |
+
"eval_loss": 2.037670850753784,
|
| 7046 |
+
"eval_runtime": 13.4751,
|
| 7047 |
+
"eval_samples_per_second": 54.916,
|
| 7048 |
+
"eval_steps_per_second": 27.458,
|
| 7049 |
+
"step": 1000
|
| 7050 |
}
|
| 7051 |
],
|
| 7052 |
"logging_steps": 1,
|
|
|
|
| 7061 |
"should_evaluate": false,
|
| 7062 |
"should_log": false,
|
| 7063 |
"should_save": true,
|
| 7064 |
+
"should_training_stop": true
|
| 7065 |
},
|
| 7066 |
"attributes": {}
|
| 7067 |
}
|
| 7068 |
},
|
| 7069 |
+
"total_flos": 9388052623392768.0,
|
| 7070 |
"train_batch_size": 2,
|
| 7071 |
"trial_name": null,
|
| 7072 |
"trial_params": null
|