Training in progress, step 940, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 259932816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0e1cc5911239f49e561557a27b5f0a85dbb7e313fc43601f1cc90c43205fc68
|
3 |
size 259932816
|
last-checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 520248073
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a466f86ba55f590c748d1bc349d5fe6c57546e91129c4c900d8113eb56637dd
|
3 |
size 520248073
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 260079091
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c5a87cde250f707ccacc19304f9de97807fe4e1340c81a39b85d565c0fb23e9
|
3 |
size 260079091
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14917
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f157be64f7256ff822ba6aa659c854e52284065797d00185f15a16bb9279903
|
3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14917
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1423736299094f0d9876f80f9f1e421064bba39236724b994e8e9c8c5510e786
|
3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1529
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d13d20eae0718d29061a425b788ef9fb122d4217938b16d41a64bc1a7b675c8
|
3 |
size 1529
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch":
|
6 |
"eval_steps": 185,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6299,6 +6299,343 @@
|
|
6299 |
"learning_rate": 9.324213144551805e-06,
|
6300 |
"loss": 2.3854,
|
6301 |
"step": 893
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6302 |
}
|
6303 |
],
|
6304 |
"logging_steps": 1,
|
@@ -6318,7 +6655,7 @@
|
|
6318 |
"attributes": {}
|
6319 |
}
|
6320 |
},
|
6321 |
-
"total_flos": 2.
|
6322 |
"train_batch_size": 1,
|
6323 |
"trial_name": null,
|
6324 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 1.0151310456633342,
|
6 |
"eval_steps": 185,
|
7 |
+
"global_step": 940,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6299 |
"learning_rate": 9.324213144551805e-06,
|
6300 |
"loss": 2.3854,
|
6301 |
"step": 893
|
6302 |
+
},
|
6303 |
+
{
|
6304 |
+
"epoch": 0.9662253445014861,
|
6305 |
+
"grad_norm": 0.3288347125053406,
|
6306 |
+
"learning_rate": 9.322864802757564e-06,
|
6307 |
+
"loss": 2.451,
|
6308 |
+
"step": 894
|
6309 |
+
},
|
6310 |
+
{
|
6311 |
+
"epoch": 0.9673061334774385,
|
6312 |
+
"grad_norm": 0.531466007232666,
|
6313 |
+
"learning_rate": 9.321514135122184e-06,
|
6314 |
+
"loss": 2.3317,
|
6315 |
+
"step": 895
|
6316 |
+
},
|
6317 |
+
{
|
6318 |
+
"epoch": 0.968386922453391,
|
6319 |
+
"grad_norm": 0.3763998746871948,
|
6320 |
+
"learning_rate": 9.320161135622483e-06,
|
6321 |
+
"loss": 2.2536,
|
6322 |
+
"step": 896
|
6323 |
+
},
|
6324 |
+
{
|
6325 |
+
"epoch": 0.9694677114293434,
|
6326 |
+
"grad_norm": 1.116055965423584,
|
6327 |
+
"learning_rate": 9.318805798214459e-06,
|
6328 |
+
"loss": 2.4407,
|
6329 |
+
"step": 897
|
6330 |
+
},
|
6331 |
+
{
|
6332 |
+
"epoch": 0.9705485004052958,
|
6333 |
+
"grad_norm": 0.49112746119499207,
|
6334 |
+
"learning_rate": 9.317448116833207e-06,
|
6335 |
+
"loss": 2.4513,
|
6336 |
+
"step": 898
|
6337 |
+
},
|
6338 |
+
{
|
6339 |
+
"epoch": 0.9716292893812483,
|
6340 |
+
"grad_norm": 0.6849557757377625,
|
6341 |
+
"learning_rate": 9.316088085392826e-06,
|
6342 |
+
"loss": 2.3513,
|
6343 |
+
"step": 899
|
6344 |
+
},
|
6345 |
+
{
|
6346 |
+
"epoch": 0.9727100783572008,
|
6347 |
+
"grad_norm": 0.5073702335357666,
|
6348 |
+
"learning_rate": 9.314725697786334e-06,
|
6349 |
+
"loss": 2.4163,
|
6350 |
+
"step": 900
|
6351 |
+
},
|
6352 |
+
{
|
6353 |
+
"epoch": 0.9737908673331532,
|
6354 |
+
"grad_norm": 0.45914849638938904,
|
6355 |
+
"learning_rate": 9.31336094788556e-06,
|
6356 |
+
"loss": 2.3871,
|
6357 |
+
"step": 901
|
6358 |
+
},
|
6359 |
+
{
|
6360 |
+
"epoch": 0.9748716563091057,
|
6361 |
+
"grad_norm": 0.33709532022476196,
|
6362 |
+
"learning_rate": 9.311993829541072e-06,
|
6363 |
+
"loss": 2.476,
|
6364 |
+
"step": 902
|
6365 |
+
},
|
6366 |
+
{
|
6367 |
+
"epoch": 0.9759524452850581,
|
6368 |
+
"grad_norm": 0.9165427684783936,
|
6369 |
+
"learning_rate": 9.310624336582073e-06,
|
6370 |
+
"loss": 2.3338,
|
6371 |
+
"step": 903
|
6372 |
+
},
|
6373 |
+
{
|
6374 |
+
"epoch": 0.9770332342610105,
|
6375 |
+
"grad_norm": 1.3027328252792358,
|
6376 |
+
"learning_rate": 9.309252462816303e-06,
|
6377 |
+
"loss": 2.4926,
|
6378 |
+
"step": 904
|
6379 |
+
},
|
6380 |
+
{
|
6381 |
+
"epoch": 0.978114023236963,
|
6382 |
+
"grad_norm": 0.3530491590499878,
|
6383 |
+
"learning_rate": 9.307878202029968e-06,
|
6384 |
+
"loss": 2.4139,
|
6385 |
+
"step": 905
|
6386 |
+
},
|
6387 |
+
{
|
6388 |
+
"epoch": 0.9791948122129154,
|
6389 |
+
"grad_norm": 0.35203471779823303,
|
6390 |
+
"learning_rate": 9.306501547987617e-06,
|
6391 |
+
"loss": 2.3944,
|
6392 |
+
"step": 906
|
6393 |
+
},
|
6394 |
+
{
|
6395 |
+
"epoch": 0.9802756011888679,
|
6396 |
+
"grad_norm": 0.3719307482242584,
|
6397 |
+
"learning_rate": 9.305122494432071e-06,
|
6398 |
+
"loss": 2.1928,
|
6399 |
+
"step": 907
|
6400 |
+
},
|
6401 |
+
{
|
6402 |
+
"epoch": 0.9813563901648203,
|
6403 |
+
"grad_norm": 1.6625559329986572,
|
6404 |
+
"learning_rate": 9.303741035084319e-06,
|
6405 |
+
"loss": 2.3813,
|
6406 |
+
"step": 908
|
6407 |
+
},
|
6408 |
+
{
|
6409 |
+
"epoch": 0.9824371791407728,
|
6410 |
+
"grad_norm": 0.39877355098724365,
|
6411 |
+
"learning_rate": 9.302357163643418e-06,
|
6412 |
+
"loss": 2.4736,
|
6413 |
+
"step": 909
|
6414 |
+
},
|
6415 |
+
{
|
6416 |
+
"epoch": 0.9835179681167252,
|
6417 |
+
"grad_norm": 0.33090370893478394,
|
6418 |
+
"learning_rate": 9.300970873786411e-06,
|
6419 |
+
"loss": 2.3231,
|
6420 |
+
"step": 910
|
6421 |
+
},
|
6422 |
+
{
|
6423 |
+
"epoch": 0.9845987570926776,
|
6424 |
+
"grad_norm": 0.3974967300891876,
|
6425 |
+
"learning_rate": 9.299582159168208e-06,
|
6426 |
+
"loss": 2.333,
|
6427 |
+
"step": 911
|
6428 |
+
},
|
6429 |
+
{
|
6430 |
+
"epoch": 0.9856795460686301,
|
6431 |
+
"grad_norm": 0.4661647379398346,
|
6432 |
+
"learning_rate": 9.298191013421514e-06,
|
6433 |
+
"loss": 2.2566,
|
6434 |
+
"step": 912
|
6435 |
+
},
|
6436 |
+
{
|
6437 |
+
"epoch": 0.9867603350445825,
|
6438 |
+
"grad_norm": 0.325780987739563,
|
6439 |
+
"learning_rate": 9.296797430156723e-06,
|
6440 |
+
"loss": 2.3431,
|
6441 |
+
"step": 913
|
6442 |
+
},
|
6443 |
+
{
|
6444 |
+
"epoch": 0.987841124020535,
|
6445 |
+
"grad_norm": 0.3106825351715088,
|
6446 |
+
"learning_rate": 9.29540140296181e-06,
|
6447 |
+
"loss": 2.2994,
|
6448 |
+
"step": 914
|
6449 |
+
},
|
6450 |
+
{
|
6451 |
+
"epoch": 0.9889219129964875,
|
6452 |
+
"grad_norm": 1.1611113548278809,
|
6453 |
+
"learning_rate": 9.294002925402243e-06,
|
6454 |
+
"loss": 2.3236,
|
6455 |
+
"step": 915
|
6456 |
+
},
|
6457 |
+
{
|
6458 |
+
"epoch": 0.9900027019724399,
|
6459 |
+
"grad_norm": 0.43262773752212524,
|
6460 |
+
"learning_rate": 9.292601991020887e-06,
|
6461 |
+
"loss": 2.3774,
|
6462 |
+
"step": 916
|
6463 |
+
},
|
6464 |
+
{
|
6465 |
+
"epoch": 0.9910834909483923,
|
6466 |
+
"grad_norm": 0.3371218740940094,
|
6467 |
+
"learning_rate": 9.291198593337894e-06,
|
6468 |
+
"loss": 2.3657,
|
6469 |
+
"step": 917
|
6470 |
+
},
|
6471 |
+
{
|
6472 |
+
"epoch": 0.9921642799243447,
|
6473 |
+
"grad_norm": 0.43215903639793396,
|
6474 |
+
"learning_rate": 9.289792725850607e-06,
|
6475 |
+
"loss": 2.4198,
|
6476 |
+
"step": 918
|
6477 |
+
},
|
6478 |
+
{
|
6479 |
+
"epoch": 0.9932450689002972,
|
6480 |
+
"grad_norm": 0.33630141615867615,
|
6481 |
+
"learning_rate": 9.288384382033469e-06,
|
6482 |
+
"loss": 2.3433,
|
6483 |
+
"step": 919
|
6484 |
+
},
|
6485 |
+
{
|
6486 |
+
"epoch": 0.9943258578762496,
|
6487 |
+
"grad_norm": 0.36779069900512695,
|
6488 |
+
"learning_rate": 9.286973555337906e-06,
|
6489 |
+
"loss": 2.4905,
|
6490 |
+
"step": 920
|
6491 |
+
},
|
6492 |
+
{
|
6493 |
+
"epoch": 0.9954066468522021,
|
6494 |
+
"grad_norm": 0.31049975752830505,
|
6495 |
+
"learning_rate": 9.285560239192237e-06,
|
6496 |
+
"loss": 2.2489,
|
6497 |
+
"step": 921
|
6498 |
+
},
|
6499 |
+
{
|
6500 |
+
"epoch": 0.9964874358281546,
|
6501 |
+
"grad_norm": 0.3580339848995209,
|
6502 |
+
"learning_rate": 9.28414442700157e-06,
|
6503 |
+
"loss": 2.3187,
|
6504 |
+
"step": 922
|
6505 |
+
},
|
6506 |
+
{
|
6507 |
+
"epoch": 0.997568224804107,
|
6508 |
+
"grad_norm": 0.45295870304107666,
|
6509 |
+
"learning_rate": 9.282726112147697e-06,
|
6510 |
+
"loss": 2.3443,
|
6511 |
+
"step": 923
|
6512 |
+
},
|
6513 |
+
{
|
6514 |
+
"epoch": 0.9986490137800594,
|
6515 |
+
"grad_norm": 0.5400623083114624,
|
6516 |
+
"learning_rate": 9.281305287988994e-06,
|
6517 |
+
"loss": 2.4031,
|
6518 |
+
"step": 924
|
6519 |
+
},
|
6520 |
+
{
|
6521 |
+
"epoch": 0.9997298027560119,
|
6522 |
+
"grad_norm": 0.32201769948005676,
|
6523 |
+
"learning_rate": 9.279881947860306e-06,
|
6524 |
+
"loss": 2.4042,
|
6525 |
+
"step": 925
|
6526 |
+
},
|
6527 |
+
{
|
6528 |
+
"epoch": 0.9997298027560119,
|
6529 |
+
"eval_loss": 2.3627305030822754,
|
6530 |
+
"eval_runtime": 441.1251,
|
6531 |
+
"eval_samples_per_second": 0.227,
|
6532 |
+
"eval_steps_per_second": 0.113,
|
6533 |
+
"step": 925
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 1.0,
|
6537 |
+
"grad_norm": 0.5289644598960876,
|
6538 |
+
"learning_rate": 9.278456085072864e-06,
|
6539 |
+
"loss": 2.3884,
|
6540 |
+
"step": 926
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 1.0010807889759525,
|
6544 |
+
"grad_norm": 0.5844120383262634,
|
6545 |
+
"learning_rate": 9.277027692914162e-06,
|
6546 |
+
"loss": 2.2919,
|
6547 |
+
"step": 927
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 1.0021615779519049,
|
6551 |
+
"grad_norm": 0.2842770516872406,
|
6552 |
+
"learning_rate": 9.27559676464786e-06,
|
6553 |
+
"loss": 2.4023,
|
6554 |
+
"step": 928
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 1.0032423669278574,
|
6558 |
+
"grad_norm": 1.1834286451339722,
|
6559 |
+
"learning_rate": 9.274163293513674e-06,
|
6560 |
+
"loss": 2.2412,
|
6561 |
+
"step": 929
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 1.0043231559038097,
|
6565 |
+
"grad_norm": 0.32926666736602783,
|
6566 |
+
"learning_rate": 9.272727272727273e-06,
|
6567 |
+
"loss": 2.3672,
|
6568 |
+
"step": 930
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 1.0054039448797623,
|
6572 |
+
"grad_norm": 0.30600887537002563,
|
6573 |
+
"learning_rate": 9.271288695480172e-06,
|
6574 |
+
"loss": 2.3036,
|
6575 |
+
"step": 931
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 1.0064847338557146,
|
6579 |
+
"grad_norm": 0.4481301009654999,
|
6580 |
+
"learning_rate": 9.269847554939618e-06,
|
6581 |
+
"loss": 2.3731,
|
6582 |
+
"step": 932
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 1.0075655228316671,
|
6586 |
+
"grad_norm": 0.6130079627037048,
|
6587 |
+
"learning_rate": 9.26840384424849e-06,
|
6588 |
+
"loss": 2.3797,
|
6589 |
+
"step": 933
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 1.0086463118076197,
|
6593 |
+
"grad_norm": 1.2700423002243042,
|
6594 |
+
"learning_rate": 9.266957556525189e-06,
|
6595 |
+
"loss": 2.3293,
|
6596 |
+
"step": 934
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 1.009727100783572,
|
6600 |
+
"grad_norm": 0.501125156879425,
|
6601 |
+
"learning_rate": 9.265508684863524e-06,
|
6602 |
+
"loss": 2.5028,
|
6603 |
+
"step": 935
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 1.0108078897595245,
|
6607 |
+
"grad_norm": 0.4285920560359955,
|
6608 |
+
"learning_rate": 9.264057222332605e-06,
|
6609 |
+
"loss": 2.1419,
|
6610 |
+
"step": 936
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 1.0118886787354768,
|
6614 |
+
"grad_norm": 0.4635486900806427,
|
6615 |
+
"learning_rate": 9.262603161976733e-06,
|
6616 |
+
"loss": 2.2757,
|
6617 |
+
"step": 937
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 1.0129694677114294,
|
6621 |
+
"grad_norm": 0.3583570122718811,
|
6622 |
+
"learning_rate": 9.261146496815287e-06,
|
6623 |
+
"loss": 2.3095,
|
6624 |
+
"step": 938
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 1.0140502566873817,
|
6628 |
+
"grad_norm": 0.43252095580101013,
|
6629 |
+
"learning_rate": 9.259687219842614e-06,
|
6630 |
+
"loss": 2.3978,
|
6631 |
+
"step": 939
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 1.0151310456633342,
|
6635 |
+
"grad_norm": 0.6250537037849426,
|
6636 |
+
"learning_rate": 9.258225324027916e-06,
|
6637 |
+
"loss": 2.2394,
|
6638 |
+
"step": 940
|
6639 |
}
|
6640 |
],
|
6641 |
"logging_steps": 1,
|
|
|
6655 |
"attributes": {}
|
6656 |
}
|
6657 |
},
|
6658 |
+
"total_flos": 2.5537803284559954e+19,
|
6659 |
"train_batch_size": 1,
|
6660 |
"trial_name": null,
|
6661 |
"trial_params": null
|