ToastyPigeon commited on
Commit
409147f
·
verified ·
1 Parent(s): ad31335

Training in progress, step 940, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b224591b33b6f0dcca681ec8c98ebd3bc73d8678c36c6d74ef5f9772fc8dfdd
3
  size 259932816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e1cc5911239f49e561557a27b5f0a85dbb7e313fc43601f1cc90c43205fc68
3
  size 259932816
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84a2ee9fe524ec7424238025b6441fe2d2576088097e5a27473f77a4a30da54
3
  size 520248073
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a466f86ba55f590c748d1bc349d5fe6c57546e91129c4c900d8113eb56637dd
3
  size 520248073
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef684c624caa4dc4e8ba0d900b78b653c1096ae75c5542ba66d71ebd70bdac39
3
  size 260079091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5a87cde250f707ccacc19304f9de97807fe4e1340c81a39b85d565c0fb23e9
3
  size 260079091
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21a9d25a3ffb87111d602f8be707f915f247940200262cfaa79a64b9962b93ca
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f157be64f7256ff822ba6aa659c854e52284065797d00185f15a16bb9279903
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e6d333c5faed40933de287cee8ff8bb32ffa8966fbdd18b75aa3caaec14033
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1423736299094f0d9876f80f9f1e421064bba39236724b994e8e9c8c5510e786
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3fdc3543f586a4b1de6270b4a6fb197c861a47b1989bb7a635856151db43b7f
3
  size 1529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d13d20eae0718d29061a425b788ef9fb122d4217938b16d41a64bc1a7b675c8
3
  size 1529
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9651445555255337,
6
  "eval_steps": 185,
7
- "global_step": 893,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6299,6 +6299,343 @@
6299
  "learning_rate": 9.324213144551805e-06,
6300
  "loss": 2.3854,
6301
  "step": 893
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6302
  }
6303
  ],
6304
  "logging_steps": 1,
@@ -6318,7 +6655,7 @@
6318
  "attributes": {}
6319
  }
6320
  },
6321
- "total_flos": 2.42802856887006e+19,
6322
  "train_batch_size": 1,
6323
  "trial_name": null,
6324
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0151310456633342,
6
  "eval_steps": 185,
7
+ "global_step": 940,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6299
  "learning_rate": 9.324213144551805e-06,
6300
  "loss": 2.3854,
6301
  "step": 893
6302
+ },
6303
+ {
6304
+ "epoch": 0.9662253445014861,
6305
+ "grad_norm": 0.3288347125053406,
6306
+ "learning_rate": 9.322864802757564e-06,
6307
+ "loss": 2.451,
6308
+ "step": 894
6309
+ },
6310
+ {
6311
+ "epoch": 0.9673061334774385,
6312
+ "grad_norm": 0.531466007232666,
6313
+ "learning_rate": 9.321514135122184e-06,
6314
+ "loss": 2.3317,
6315
+ "step": 895
6316
+ },
6317
+ {
6318
+ "epoch": 0.968386922453391,
6319
+ "grad_norm": 0.3763998746871948,
6320
+ "learning_rate": 9.320161135622483e-06,
6321
+ "loss": 2.2536,
6322
+ "step": 896
6323
+ },
6324
+ {
6325
+ "epoch": 0.9694677114293434,
6326
+ "grad_norm": 1.116055965423584,
6327
+ "learning_rate": 9.318805798214459e-06,
6328
+ "loss": 2.4407,
6329
+ "step": 897
6330
+ },
6331
+ {
6332
+ "epoch": 0.9705485004052958,
6333
+ "grad_norm": 0.49112746119499207,
6334
+ "learning_rate": 9.317448116833207e-06,
6335
+ "loss": 2.4513,
6336
+ "step": 898
6337
+ },
6338
+ {
6339
+ "epoch": 0.9716292893812483,
6340
+ "grad_norm": 0.6849557757377625,
6341
+ "learning_rate": 9.316088085392826e-06,
6342
+ "loss": 2.3513,
6343
+ "step": 899
6344
+ },
6345
+ {
6346
+ "epoch": 0.9727100783572008,
6347
+ "grad_norm": 0.5073702335357666,
6348
+ "learning_rate": 9.314725697786334e-06,
6349
+ "loss": 2.4163,
6350
+ "step": 900
6351
+ },
6352
+ {
6353
+ "epoch": 0.9737908673331532,
6354
+ "grad_norm": 0.45914849638938904,
6355
+ "learning_rate": 9.31336094788556e-06,
6356
+ "loss": 2.3871,
6357
+ "step": 901
6358
+ },
6359
+ {
6360
+ "epoch": 0.9748716563091057,
6361
+ "grad_norm": 0.33709532022476196,
6362
+ "learning_rate": 9.311993829541072e-06,
6363
+ "loss": 2.476,
6364
+ "step": 902
6365
+ },
6366
+ {
6367
+ "epoch": 0.9759524452850581,
6368
+ "grad_norm": 0.9165427684783936,
6369
+ "learning_rate": 9.310624336582073e-06,
6370
+ "loss": 2.3338,
6371
+ "step": 903
6372
+ },
6373
+ {
6374
+ "epoch": 0.9770332342610105,
6375
+ "grad_norm": 1.3027328252792358,
6376
+ "learning_rate": 9.309252462816303e-06,
6377
+ "loss": 2.4926,
6378
+ "step": 904
6379
+ },
6380
+ {
6381
+ "epoch": 0.978114023236963,
6382
+ "grad_norm": 0.3530491590499878,
6383
+ "learning_rate": 9.307878202029968e-06,
6384
+ "loss": 2.4139,
6385
+ "step": 905
6386
+ },
6387
+ {
6388
+ "epoch": 0.9791948122129154,
6389
+ "grad_norm": 0.35203471779823303,
6390
+ "learning_rate": 9.306501547987617e-06,
6391
+ "loss": 2.3944,
6392
+ "step": 906
6393
+ },
6394
+ {
6395
+ "epoch": 0.9802756011888679,
6396
+ "grad_norm": 0.3719307482242584,
6397
+ "learning_rate": 9.305122494432071e-06,
6398
+ "loss": 2.1928,
6399
+ "step": 907
6400
+ },
6401
+ {
6402
+ "epoch": 0.9813563901648203,
6403
+ "grad_norm": 1.6625559329986572,
6404
+ "learning_rate": 9.303741035084319e-06,
6405
+ "loss": 2.3813,
6406
+ "step": 908
6407
+ },
6408
+ {
6409
+ "epoch": 0.9824371791407728,
6410
+ "grad_norm": 0.39877355098724365,
6411
+ "learning_rate": 9.302357163643418e-06,
6412
+ "loss": 2.4736,
6413
+ "step": 909
6414
+ },
6415
+ {
6416
+ "epoch": 0.9835179681167252,
6417
+ "grad_norm": 0.33090370893478394,
6418
+ "learning_rate": 9.300970873786411e-06,
6419
+ "loss": 2.3231,
6420
+ "step": 910
6421
+ },
6422
+ {
6423
+ "epoch": 0.9845987570926776,
6424
+ "grad_norm": 0.3974967300891876,
6425
+ "learning_rate": 9.299582159168208e-06,
6426
+ "loss": 2.333,
6427
+ "step": 911
6428
+ },
6429
+ {
6430
+ "epoch": 0.9856795460686301,
6431
+ "grad_norm": 0.4661647379398346,
6432
+ "learning_rate": 9.298191013421514e-06,
6433
+ "loss": 2.2566,
6434
+ "step": 912
6435
+ },
6436
+ {
6437
+ "epoch": 0.9867603350445825,
6438
+ "grad_norm": 0.325780987739563,
6439
+ "learning_rate": 9.296797430156723e-06,
6440
+ "loss": 2.3431,
6441
+ "step": 913
6442
+ },
6443
+ {
6444
+ "epoch": 0.987841124020535,
6445
+ "grad_norm": 0.3106825351715088,
6446
+ "learning_rate": 9.29540140296181e-06,
6447
+ "loss": 2.2994,
6448
+ "step": 914
6449
+ },
6450
+ {
6451
+ "epoch": 0.9889219129964875,
6452
+ "grad_norm": 1.1611113548278809,
6453
+ "learning_rate": 9.294002925402243e-06,
6454
+ "loss": 2.3236,
6455
+ "step": 915
6456
+ },
6457
+ {
6458
+ "epoch": 0.9900027019724399,
6459
+ "grad_norm": 0.43262773752212524,
6460
+ "learning_rate": 9.292601991020887e-06,
6461
+ "loss": 2.3774,
6462
+ "step": 916
6463
+ },
6464
+ {
6465
+ "epoch": 0.9910834909483923,
6466
+ "grad_norm": 0.3371218740940094,
6467
+ "learning_rate": 9.291198593337894e-06,
6468
+ "loss": 2.3657,
6469
+ "step": 917
6470
+ },
6471
+ {
6472
+ "epoch": 0.9921642799243447,
6473
+ "grad_norm": 0.43215903639793396,
6474
+ "learning_rate": 9.289792725850607e-06,
6475
+ "loss": 2.4198,
6476
+ "step": 918
6477
+ },
6478
+ {
6479
+ "epoch": 0.9932450689002972,
6480
+ "grad_norm": 0.33630141615867615,
6481
+ "learning_rate": 9.288384382033469e-06,
6482
+ "loss": 2.3433,
6483
+ "step": 919
6484
+ },
6485
+ {
6486
+ "epoch": 0.9943258578762496,
6487
+ "grad_norm": 0.36779069900512695,
6488
+ "learning_rate": 9.286973555337906e-06,
6489
+ "loss": 2.4905,
6490
+ "step": 920
6491
+ },
6492
+ {
6493
+ "epoch": 0.9954066468522021,
6494
+ "grad_norm": 0.31049975752830505,
6495
+ "learning_rate": 9.285560239192237e-06,
6496
+ "loss": 2.2489,
6497
+ "step": 921
6498
+ },
6499
+ {
6500
+ "epoch": 0.9964874358281546,
6501
+ "grad_norm": 0.3580339848995209,
6502
+ "learning_rate": 9.28414442700157e-06,
6503
+ "loss": 2.3187,
6504
+ "step": 922
6505
+ },
6506
+ {
6507
+ "epoch": 0.997568224804107,
6508
+ "grad_norm": 0.45295870304107666,
6509
+ "learning_rate": 9.282726112147697e-06,
6510
+ "loss": 2.3443,
6511
+ "step": 923
6512
+ },
6513
+ {
6514
+ "epoch": 0.9986490137800594,
6515
+ "grad_norm": 0.5400623083114624,
6516
+ "learning_rate": 9.281305287988994e-06,
6517
+ "loss": 2.4031,
6518
+ "step": 924
6519
+ },
6520
+ {
6521
+ "epoch": 0.9997298027560119,
6522
+ "grad_norm": 0.32201769948005676,
6523
+ "learning_rate": 9.279881947860306e-06,
6524
+ "loss": 2.4042,
6525
+ "step": 925
6526
+ },
6527
+ {
6528
+ "epoch": 0.9997298027560119,
6529
+ "eval_loss": 2.3627305030822754,
6530
+ "eval_runtime": 441.1251,
6531
+ "eval_samples_per_second": 0.227,
6532
+ "eval_steps_per_second": 0.113,
6533
+ "step": 925
6534
+ },
6535
+ {
6536
+ "epoch": 1.0,
6537
+ "grad_norm": 0.5289644598960876,
6538
+ "learning_rate": 9.278456085072864e-06,
6539
+ "loss": 2.3884,
6540
+ "step": 926
6541
+ },
6542
+ {
6543
+ "epoch": 1.0010807889759525,
6544
+ "grad_norm": 0.5844120383262634,
6545
+ "learning_rate": 9.277027692914162e-06,
6546
+ "loss": 2.2919,
6547
+ "step": 927
6548
+ },
6549
+ {
6550
+ "epoch": 1.0021615779519049,
6551
+ "grad_norm": 0.2842770516872406,
6552
+ "learning_rate": 9.27559676464786e-06,
6553
+ "loss": 2.4023,
6554
+ "step": 928
6555
+ },
6556
+ {
6557
+ "epoch": 1.0032423669278574,
6558
+ "grad_norm": 1.1834286451339722,
6559
+ "learning_rate": 9.274163293513674e-06,
6560
+ "loss": 2.2412,
6561
+ "step": 929
6562
+ },
6563
+ {
6564
+ "epoch": 1.0043231559038097,
6565
+ "grad_norm": 0.32926666736602783,
6566
+ "learning_rate": 9.272727272727273e-06,
6567
+ "loss": 2.3672,
6568
+ "step": 930
6569
+ },
6570
+ {
6571
+ "epoch": 1.0054039448797623,
6572
+ "grad_norm": 0.30600887537002563,
6573
+ "learning_rate": 9.271288695480172e-06,
6574
+ "loss": 2.3036,
6575
+ "step": 931
6576
+ },
6577
+ {
6578
+ "epoch": 1.0064847338557146,
6579
+ "grad_norm": 0.4481301009654999,
6580
+ "learning_rate": 9.269847554939618e-06,
6581
+ "loss": 2.3731,
6582
+ "step": 932
6583
+ },
6584
+ {
6585
+ "epoch": 1.0075655228316671,
6586
+ "grad_norm": 0.6130079627037048,
6587
+ "learning_rate": 9.26840384424849e-06,
6588
+ "loss": 2.3797,
6589
+ "step": 933
6590
+ },
6591
+ {
6592
+ "epoch": 1.0086463118076197,
6593
+ "grad_norm": 1.2700423002243042,
6594
+ "learning_rate": 9.266957556525189e-06,
6595
+ "loss": 2.3293,
6596
+ "step": 934
6597
+ },
6598
+ {
6599
+ "epoch": 1.009727100783572,
6600
+ "grad_norm": 0.501125156879425,
6601
+ "learning_rate": 9.265508684863524e-06,
6602
+ "loss": 2.5028,
6603
+ "step": 935
6604
+ },
6605
+ {
6606
+ "epoch": 1.0108078897595245,
6607
+ "grad_norm": 0.4285920560359955,
6608
+ "learning_rate": 9.264057222332605e-06,
6609
+ "loss": 2.1419,
6610
+ "step": 936
6611
+ },
6612
+ {
6613
+ "epoch": 1.0118886787354768,
6614
+ "grad_norm": 0.4635486900806427,
6615
+ "learning_rate": 9.262603161976733e-06,
6616
+ "loss": 2.2757,
6617
+ "step": 937
6618
+ },
6619
+ {
6620
+ "epoch": 1.0129694677114294,
6621
+ "grad_norm": 0.3583570122718811,
6622
+ "learning_rate": 9.261146496815287e-06,
6623
+ "loss": 2.3095,
6624
+ "step": 938
6625
+ },
6626
+ {
6627
+ "epoch": 1.0140502566873817,
6628
+ "grad_norm": 0.43252095580101013,
6629
+ "learning_rate": 9.259687219842614e-06,
6630
+ "loss": 2.3978,
6631
+ "step": 939
6632
+ },
6633
+ {
6634
+ "epoch": 1.0151310456633342,
6635
+ "grad_norm": 0.6250537037849426,
6636
+ "learning_rate": 9.258225324027916e-06,
6637
+ "loss": 2.2394,
6638
+ "step": 940
6639
  }
6640
  ],
6641
  "logging_steps": 1,
 
6655
  "attributes": {}
6656
  }
6657
  },
6658
+ "total_flos": 2.5537803284559954e+19,
6659
  "train_batch_size": 1,
6660
  "trial_name": null,
6661
  "trial_params": null