Safetensors
English
gemma
safety
sft
gemma-7b-it_invthink / trainer_state.json
ybkim95's picture
Upload folder using huggingface_hub
e6e1ed2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014295925661186561,
"grad_norm": 16.642337799072266,
"learning_rate": 1.9742857142857144e-05,
"loss": 4.2448,
"mean_token_accuracy": 0.44755197104532274,
"num_tokens": 63714.0,
"step": 10
},
{
"epoch": 0.028591851322373123,
"grad_norm": 12.869135856628418,
"learning_rate": 1.945714285714286e-05,
"loss": 1.9287,
"mean_token_accuracy": 0.5766903940588236,
"num_tokens": 128528.0,
"step": 20
},
{
"epoch": 0.04288777698355969,
"grad_norm": 18.376684188842773,
"learning_rate": 1.9171428571428573e-05,
"loss": 1.6956,
"mean_token_accuracy": 0.597195016592741,
"num_tokens": 191200.0,
"step": 30
},
{
"epoch": 0.057183702644746245,
"grad_norm": 17.71656036376953,
"learning_rate": 1.888571428571429e-05,
"loss": 1.6076,
"mean_token_accuracy": 0.6067132025957107,
"num_tokens": 255728.0,
"step": 40
},
{
"epoch": 0.07147962830593281,
"grad_norm": 21.026283264160156,
"learning_rate": 1.86e-05,
"loss": 1.5728,
"mean_token_accuracy": 0.612850959226489,
"num_tokens": 319058.0,
"step": 50
},
{
"epoch": 0.08577555396711938,
"grad_norm": 46.10198974609375,
"learning_rate": 1.8314285714285714e-05,
"loss": 1.5977,
"mean_token_accuracy": 0.6111391615122557,
"num_tokens": 384900.0,
"step": 60
},
{
"epoch": 0.10007147962830593,
"grad_norm": 14.742942810058594,
"learning_rate": 1.802857142857143e-05,
"loss": 1.5649,
"mean_token_accuracy": 0.6097237385809422,
"num_tokens": 450346.0,
"step": 70
},
{
"epoch": 0.11436740528949249,
"grad_norm": 43.62748718261719,
"learning_rate": 1.7742857142857143e-05,
"loss": 1.5184,
"mean_token_accuracy": 0.6210372049361468,
"num_tokens": 515018.0,
"step": 80
},
{
"epoch": 0.12866333095067906,
"grad_norm": 15.469511032104492,
"learning_rate": 1.745714285714286e-05,
"loss": 1.4736,
"mean_token_accuracy": 0.6270900748670101,
"num_tokens": 576955.0,
"step": 90
},
{
"epoch": 0.14295925661186562,
"grad_norm": 19.448793411254883,
"learning_rate": 1.717142857142857e-05,
"loss": 1.4637,
"mean_token_accuracy": 0.6368957210332156,
"num_tokens": 641295.0,
"step": 100
},
{
"epoch": 0.15725518227305219,
"grad_norm": 37.31778335571289,
"learning_rate": 1.6885714285714288e-05,
"loss": 1.5303,
"mean_token_accuracy": 0.6210926879197359,
"num_tokens": 706683.0,
"step": 110
},
{
"epoch": 0.17155110793423875,
"grad_norm": 9.722342491149902,
"learning_rate": 1.66e-05,
"loss": 1.4596,
"mean_token_accuracy": 0.6298462159931659,
"num_tokens": 771285.0,
"step": 120
},
{
"epoch": 0.18584703359542531,
"grad_norm": 9.656769752502441,
"learning_rate": 1.6314285714285716e-05,
"loss": 1.5281,
"mean_token_accuracy": 0.6251190695911646,
"num_tokens": 840678.0,
"step": 130
},
{
"epoch": 0.20014295925661185,
"grad_norm": 9.608354568481445,
"learning_rate": 1.602857142857143e-05,
"loss": 1.4438,
"mean_token_accuracy": 0.6370445918291807,
"num_tokens": 905832.0,
"step": 140
},
{
"epoch": 0.21443888491779842,
"grad_norm": 9.842904090881348,
"learning_rate": 1.5742857142857145e-05,
"loss": 1.5379,
"mean_token_accuracy": 0.6172758720815181,
"num_tokens": 972946.0,
"step": 150
},
{
"epoch": 0.22873481057898498,
"grad_norm": 18.17994499206543,
"learning_rate": 1.545714285714286e-05,
"loss": 1.4322,
"mean_token_accuracy": 0.6351331725716591,
"num_tokens": 1034427.0,
"step": 160
},
{
"epoch": 0.24303073624017155,
"grad_norm": 8.876994132995605,
"learning_rate": 1.5171428571428572e-05,
"loss": 1.4343,
"mean_token_accuracy": 0.6313620086759328,
"num_tokens": 1101359.0,
"step": 170
},
{
"epoch": 0.2573266619013581,
"grad_norm": 10.895979881286621,
"learning_rate": 1.4885714285714288e-05,
"loss": 1.4537,
"mean_token_accuracy": 0.633945481479168,
"num_tokens": 1166538.0,
"step": 180
},
{
"epoch": 0.27162258756254465,
"grad_norm": 12.30453872680664,
"learning_rate": 1.46e-05,
"loss": 1.5363,
"mean_token_accuracy": 0.6255367647856473,
"num_tokens": 1235118.0,
"step": 190
},
{
"epoch": 0.28591851322373124,
"grad_norm": 10.28065299987793,
"learning_rate": 1.4314285714285717e-05,
"loss": 1.4199,
"mean_token_accuracy": 0.635765865072608,
"num_tokens": 1300601.0,
"step": 200
},
{
"epoch": 0.3002144388849178,
"grad_norm": 11.893675804138184,
"learning_rate": 1.402857142857143e-05,
"loss": 1.3881,
"mean_token_accuracy": 0.6449477795511485,
"num_tokens": 1362732.0,
"step": 210
},
{
"epoch": 0.31451036454610437,
"grad_norm": 11.485602378845215,
"learning_rate": 1.3742857142857144e-05,
"loss": 1.3647,
"mean_token_accuracy": 0.6486451178789139,
"num_tokens": 1424780.0,
"step": 220
},
{
"epoch": 0.3288062902072909,
"grad_norm": 8.882689476013184,
"learning_rate": 1.3457142857142858e-05,
"loss": 1.3915,
"mean_token_accuracy": 0.6436454936861992,
"num_tokens": 1490808.0,
"step": 230
},
{
"epoch": 0.3431022158684775,
"grad_norm": 18.272981643676758,
"learning_rate": 1.3171428571428573e-05,
"loss": 1.4796,
"mean_token_accuracy": 0.6283955980092287,
"num_tokens": 1556933.0,
"step": 240
},
{
"epoch": 0.35739814152966404,
"grad_norm": 11.947668075561523,
"learning_rate": 1.2885714285714285e-05,
"loss": 1.4398,
"mean_token_accuracy": 0.638329004868865,
"num_tokens": 1621053.0,
"step": 250
},
{
"epoch": 0.37169406719085063,
"grad_norm": 23.547773361206055,
"learning_rate": 1.2600000000000001e-05,
"loss": 1.3801,
"mean_token_accuracy": 0.6448216594755649,
"num_tokens": 1686747.0,
"step": 260
},
{
"epoch": 0.38598999285203717,
"grad_norm": 16.82425880432129,
"learning_rate": 1.2314285714285716e-05,
"loss": 1.3891,
"mean_token_accuracy": 0.6475608512759209,
"num_tokens": 1751946.0,
"step": 270
},
{
"epoch": 0.4002859185132237,
"grad_norm": 11.931357383728027,
"learning_rate": 1.202857142857143e-05,
"loss": 1.3768,
"mean_token_accuracy": 0.6439446356147528,
"num_tokens": 1816123.0,
"step": 280
},
{
"epoch": 0.4145818441744103,
"grad_norm": 14.375319480895996,
"learning_rate": 1.1742857142857144e-05,
"loss": 1.315,
"mean_token_accuracy": 0.6517576463520527,
"num_tokens": 1879227.0,
"step": 290
},
{
"epoch": 0.42887776983559683,
"grad_norm": 10.699817657470703,
"learning_rate": 1.1457142857142857e-05,
"loss": 1.3519,
"mean_token_accuracy": 0.6487406313419342,
"num_tokens": 1944238.0,
"step": 300
},
{
"epoch": 0.4431736954967834,
"grad_norm": 12.067941665649414,
"learning_rate": 1.1171428571428573e-05,
"loss": 1.2784,
"mean_token_accuracy": 0.6627866499125957,
"num_tokens": 2007629.0,
"step": 310
},
{
"epoch": 0.45746962115796996,
"grad_norm": 15.550559997558594,
"learning_rate": 1.0885714285714286e-05,
"loss": 1.3495,
"mean_token_accuracy": 0.6514371998608113,
"num_tokens": 2076666.0,
"step": 320
},
{
"epoch": 0.47176554681915656,
"grad_norm": 30.000173568725586,
"learning_rate": 1.0600000000000002e-05,
"loss": 1.3358,
"mean_token_accuracy": 0.6507035464048385,
"num_tokens": 2140860.0,
"step": 330
},
{
"epoch": 0.4860614724803431,
"grad_norm": 7.962319850921631,
"learning_rate": 1.0314285714285715e-05,
"loss": 1.3231,
"mean_token_accuracy": 0.6570919144898653,
"num_tokens": 2204773.0,
"step": 340
},
{
"epoch": 0.5003573981415297,
"grad_norm": 24.023008346557617,
"learning_rate": 1.002857142857143e-05,
"loss": 1.3936,
"mean_token_accuracy": 0.6455658808350563,
"num_tokens": 2270923.0,
"step": 350
},
{
"epoch": 0.5146533238027162,
"grad_norm": 8.74783706665039,
"learning_rate": 9.742857142857143e-06,
"loss": 1.3383,
"mean_token_accuracy": 0.6552599217742682,
"num_tokens": 2337009.0,
"step": 360
},
{
"epoch": 0.5289492494639028,
"grad_norm": 17.01344108581543,
"learning_rate": 9.457142857142858e-06,
"loss": 1.3524,
"mean_token_accuracy": 0.6488417606800795,
"num_tokens": 2405973.0,
"step": 370
},
{
"epoch": 0.5432451751250893,
"grad_norm": 9.353411674499512,
"learning_rate": 9.171428571428572e-06,
"loss": 1.2638,
"mean_token_accuracy": 0.6636200629174709,
"num_tokens": 2469824.0,
"step": 380
},
{
"epoch": 0.557541100786276,
"grad_norm": 13.265799522399902,
"learning_rate": 8.885714285714286e-06,
"loss": 1.2254,
"mean_token_accuracy": 0.6689145911484957,
"num_tokens": 2535167.0,
"step": 390
},
{
"epoch": 0.5718370264474625,
"grad_norm": 19.46824836730957,
"learning_rate": 8.6e-06,
"loss": 1.3844,
"mean_token_accuracy": 0.6483918268233537,
"num_tokens": 2607759.0,
"step": 400
},
{
"epoch": 0.586132952108649,
"grad_norm": 15.773782730102539,
"learning_rate": 8.314285714285715e-06,
"loss": 1.2708,
"mean_token_accuracy": 0.6655503377318382,
"num_tokens": 2670580.0,
"step": 410
},
{
"epoch": 0.6004288777698356,
"grad_norm": 8.917901039123535,
"learning_rate": 8.02857142857143e-06,
"loss": 1.2726,
"mean_token_accuracy": 0.6589578501880169,
"num_tokens": 2737272.0,
"step": 420
},
{
"epoch": 0.6147248034310222,
"grad_norm": 8.988587379455566,
"learning_rate": 7.742857142857144e-06,
"loss": 1.221,
"mean_token_accuracy": 0.664219357818365,
"num_tokens": 2803875.0,
"step": 430
},
{
"epoch": 0.6290207290922087,
"grad_norm": 12.661059379577637,
"learning_rate": 7.457142857142857e-06,
"loss": 1.2658,
"mean_token_accuracy": 0.662236025184393,
"num_tokens": 2869457.0,
"step": 440
},
{
"epoch": 0.6433166547533953,
"grad_norm": 8.545147895812988,
"learning_rate": 7.1714285714285725e-06,
"loss": 1.2778,
"mean_token_accuracy": 0.6622273363173008,
"num_tokens": 2931790.0,
"step": 450
},
{
"epoch": 0.6576125804145818,
"grad_norm": 20.769514083862305,
"learning_rate": 6.885714285714287e-06,
"loss": 1.2951,
"mean_token_accuracy": 0.6606701787561178,
"num_tokens": 2997229.0,
"step": 460
},
{
"epoch": 0.6719085060757684,
"grad_norm": 12.466110229492188,
"learning_rate": 6.600000000000001e-06,
"loss": 1.1754,
"mean_token_accuracy": 0.6822692640125751,
"num_tokens": 3063485.0,
"step": 470
},
{
"epoch": 0.686204431736955,
"grad_norm": 8.45051383972168,
"learning_rate": 6.314285714285715e-06,
"loss": 1.2102,
"mean_token_accuracy": 0.6759132348001003,
"num_tokens": 3127984.0,
"step": 480
},
{
"epoch": 0.7005003573981415,
"grad_norm": 12.029594421386719,
"learning_rate": 6.028571428571429e-06,
"loss": 1.3355,
"mean_token_accuracy": 0.6649406619369984,
"num_tokens": 3194219.0,
"step": 490
},
{
"epoch": 0.7147962830593281,
"grad_norm": 8.824553489685059,
"learning_rate": 5.742857142857143e-06,
"loss": 1.2317,
"mean_token_accuracy": 0.6705160938203335,
"num_tokens": 3259068.0,
"step": 500
},
{
"epoch": 0.7290922087205146,
"grad_norm": 16.150766372680664,
"learning_rate": 5.457142857142858e-06,
"loss": 1.1558,
"mean_token_accuracy": 0.6850677601993084,
"num_tokens": 3324070.0,
"step": 510
},
{
"epoch": 0.7433881343817013,
"grad_norm": 7.721499919891357,
"learning_rate": 5.171428571428571e-06,
"loss": 1.168,
"mean_token_accuracy": 0.6747931383550168,
"num_tokens": 3386885.0,
"step": 520
},
{
"epoch": 0.7576840600428878,
"grad_norm": 9.311972618103027,
"learning_rate": 4.885714285714286e-06,
"loss": 1.1645,
"mean_token_accuracy": 0.6775478422641754,
"num_tokens": 3448602.0,
"step": 530
},
{
"epoch": 0.7719799857040743,
"grad_norm": 9.636552810668945,
"learning_rate": 4.600000000000001e-06,
"loss": 1.2542,
"mean_token_accuracy": 0.6680241461843253,
"num_tokens": 3516481.0,
"step": 540
},
{
"epoch": 0.7862759113652609,
"grad_norm": 36.31599044799805,
"learning_rate": 4.314285714285714e-06,
"loss": 1.1866,
"mean_token_accuracy": 0.6768352195620537,
"num_tokens": 3580217.0,
"step": 550
},
{
"epoch": 0.8005718370264474,
"grad_norm": 7.471230506896973,
"learning_rate": 4.028571428571429e-06,
"loss": 1.1705,
"mean_token_accuracy": 0.6818295098841191,
"num_tokens": 3643021.0,
"step": 560
},
{
"epoch": 0.8148677626876341,
"grad_norm": 48.099830627441406,
"learning_rate": 3.742857142857143e-06,
"loss": 1.1602,
"mean_token_accuracy": 0.6852999441325665,
"num_tokens": 3710116.0,
"step": 570
},
{
"epoch": 0.8291636883488206,
"grad_norm": 13.096914291381836,
"learning_rate": 3.4571428571428574e-06,
"loss": 1.1942,
"mean_token_accuracy": 0.6752621583640576,
"num_tokens": 3775926.0,
"step": 580
},
{
"epoch": 0.8434596140100071,
"grad_norm": 11.580378532409668,
"learning_rate": 3.1714285714285714e-06,
"loss": 1.1277,
"mean_token_accuracy": 0.6849311918020249,
"num_tokens": 3840218.0,
"step": 590
},
{
"epoch": 0.8577555396711937,
"grad_norm": 9.58252239227295,
"learning_rate": 2.885714285714286e-06,
"loss": 1.187,
"mean_token_accuracy": 0.6740429483354091,
"num_tokens": 3904300.0,
"step": 600
},
{
"epoch": 0.8720514653323803,
"grad_norm": 9.778560638427734,
"learning_rate": 2.6e-06,
"loss": 1.2088,
"mean_token_accuracy": 0.6759266927838326,
"num_tokens": 3970409.0,
"step": 610
},
{
"epoch": 0.8863473909935669,
"grad_norm": 9.931038856506348,
"learning_rate": 2.3142857142857145e-06,
"loss": 1.1766,
"mean_token_accuracy": 0.6766778022050858,
"num_tokens": 4038742.0,
"step": 620
},
{
"epoch": 0.9006433166547534,
"grad_norm": 7.126023769378662,
"learning_rate": 2.028571428571429e-06,
"loss": 1.0968,
"mean_token_accuracy": 0.6913008309900761,
"num_tokens": 4103374.0,
"step": 630
},
{
"epoch": 0.9149392423159399,
"grad_norm": 7.73612642288208,
"learning_rate": 1.7428571428571432e-06,
"loss": 1.1254,
"mean_token_accuracy": 0.6863209947943687,
"num_tokens": 4170239.0,
"step": 640
},
{
"epoch": 0.9292351679771265,
"grad_norm": 6.532904148101807,
"learning_rate": 1.4571428571428573e-06,
"loss": 1.1586,
"mean_token_accuracy": 0.6804635964334012,
"num_tokens": 4237810.0,
"step": 650
},
{
"epoch": 0.9435310936383131,
"grad_norm": 7.370081901550293,
"learning_rate": 1.1714285714285715e-06,
"loss": 1.174,
"mean_token_accuracy": 0.6809860028326511,
"num_tokens": 4302937.0,
"step": 660
},
{
"epoch": 0.9578270192994996,
"grad_norm": 7.471885681152344,
"learning_rate": 8.857142857142857e-07,
"loss": 1.1755,
"mean_token_accuracy": 0.6858656518161297,
"num_tokens": 4368704.0,
"step": 670
},
{
"epoch": 0.9721229449606862,
"grad_norm": 9.739863395690918,
"learning_rate": 6.000000000000001e-07,
"loss": 1.1052,
"mean_token_accuracy": 0.6904201626777648,
"num_tokens": 4431384.0,
"step": 680
},
{
"epoch": 0.9864188706218727,
"grad_norm": 11.182050704956055,
"learning_rate": 3.1428571428571433e-07,
"loss": 1.1422,
"mean_token_accuracy": 0.688240597397089,
"num_tokens": 4500182.0,
"step": 690
},
{
"epoch": 1.0,
"grad_norm": 11.066879272460938,
"learning_rate": 2.8571428571428575e-08,
"loss": 1.131,
"mean_token_accuracy": 0.6868848518321389,
"num_tokens": 4559091.0,
"step": 700
}
],
"logging_steps": 10,
"max_steps": 700,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4791381278720.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}