Agentic-Qwen2.5-7B-e2-lr05-b8 / trainer_state.json
akseljoonas's picture
akseljoonas HF Staff
Model save
fbd7e9c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 262,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03816793893129771,
"grad_norm": 9.226670913876331,
"learning_rate": 7.407407407407407e-07,
"loss": 1.0409,
"mean_token_accuracy": 0.7926116704940795,
"num_tokens": 386810.0,
"step": 5
},
{
"epoch": 0.07633587786259542,
"grad_norm": 3.804973403997504,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.0712,
"mean_token_accuracy": 0.7756210923194885,
"num_tokens": 787708.0,
"step": 10
},
{
"epoch": 0.11450381679389313,
"grad_norm": 1.7499258578222254,
"learning_rate": 2.5925925925925925e-06,
"loss": 0.8985,
"mean_token_accuracy": 0.7973829984664917,
"num_tokens": 1192940.0,
"step": 15
},
{
"epoch": 0.15267175572519084,
"grad_norm": 6.83927543051746,
"learning_rate": 3.5185185185185187e-06,
"loss": 0.8262,
"mean_token_accuracy": 0.8118989109992981,
"num_tokens": 1584559.0,
"step": 20
},
{
"epoch": 0.19083969465648856,
"grad_norm": 0.9445650007024579,
"learning_rate": 4.444444444444444e-06,
"loss": 0.5909,
"mean_token_accuracy": 0.8610062956809997,
"num_tokens": 1994159.0,
"step": 25
},
{
"epoch": 0.22900763358778625,
"grad_norm": 0.7333396651548034,
"learning_rate": 4.957446808510639e-06,
"loss": 0.6298,
"mean_token_accuracy": 0.8575117826461792,
"num_tokens": 2395528.0,
"step": 30
},
{
"epoch": 0.26717557251908397,
"grad_norm": 0.8342949171905462,
"learning_rate": 4.851063829787234e-06,
"loss": 0.4509,
"mean_token_accuracy": 0.8964329123497009,
"num_tokens": 2800227.0,
"step": 35
},
{
"epoch": 0.3053435114503817,
"grad_norm": 0.7517210923147133,
"learning_rate": 4.7446808510638305e-06,
"loss": 0.6186,
"mean_token_accuracy": 0.8617437362670899,
"num_tokens": 3193615.0,
"step": 40
},
{
"epoch": 0.3435114503816794,
"grad_norm": 0.7224468196652443,
"learning_rate": 4.638297872340426e-06,
"loss": 0.5298,
"mean_token_accuracy": 0.8760863780975342,
"num_tokens": 3602519.0,
"step": 45
},
{
"epoch": 0.3816793893129771,
"grad_norm": 9.152121654360869,
"learning_rate": 4.5319148936170215e-06,
"loss": 0.6868,
"mean_token_accuracy": 0.8515465021133423,
"num_tokens": 4001905.0,
"step": 50
},
{
"epoch": 0.4198473282442748,
"grad_norm": 3.892592763846139,
"learning_rate": 4.425531914893617e-06,
"loss": 0.6565,
"mean_token_accuracy": 0.858378803730011,
"num_tokens": 4396347.0,
"step": 55
},
{
"epoch": 0.4580152671755725,
"grad_norm": 0.9697380164764114,
"learning_rate": 4.319148936170213e-06,
"loss": 0.5786,
"mean_token_accuracy": 0.8708330988883972,
"num_tokens": 4805947.0,
"step": 60
},
{
"epoch": 0.4961832061068702,
"grad_norm": 0.5790341294192543,
"learning_rate": 4.212765957446809e-06,
"loss": 0.4529,
"mean_token_accuracy": 0.889950966835022,
"num_tokens": 5212394.0,
"step": 65
},
{
"epoch": 0.5343511450381679,
"grad_norm": 0.7713566871833059,
"learning_rate": 4.106382978723404e-06,
"loss": 0.5353,
"mean_token_accuracy": 0.8747799396514893,
"num_tokens": 5613999.0,
"step": 70
},
{
"epoch": 0.5725190839694656,
"grad_norm": 0.5654210829824995,
"learning_rate": 4.000000000000001e-06,
"loss": 0.5676,
"mean_token_accuracy": 0.8743081212043762,
"num_tokens": 6023295.0,
"step": 75
},
{
"epoch": 0.6106870229007634,
"grad_norm": 0.5378495172846931,
"learning_rate": 3.893617021276596e-06,
"loss": 0.6124,
"mean_token_accuracy": 0.8694724321365357,
"num_tokens": 6423827.0,
"step": 80
},
{
"epoch": 0.648854961832061,
"grad_norm": 4.6481796647437275,
"learning_rate": 3.7872340425531917e-06,
"loss": 0.5453,
"mean_token_accuracy": 0.8780820608139038,
"num_tokens": 6830002.0,
"step": 85
},
{
"epoch": 0.6870229007633588,
"grad_norm": 0.6013868777206862,
"learning_rate": 3.680851063829787e-06,
"loss": 0.4699,
"mean_token_accuracy": 0.8892535328865051,
"num_tokens": 7239602.0,
"step": 90
},
{
"epoch": 0.7251908396946565,
"grad_norm": 0.5982251388238545,
"learning_rate": 3.5744680851063835e-06,
"loss": 0.5096,
"mean_token_accuracy": 0.8798805952072144,
"num_tokens": 7639508.0,
"step": 95
},
{
"epoch": 0.7633587786259542,
"grad_norm": 0.6181420027899801,
"learning_rate": 3.468085106382979e-06,
"loss": 0.4994,
"mean_token_accuracy": 0.8826330661773681,
"num_tokens": 8010548.0,
"step": 100
},
{
"epoch": 0.8015267175572519,
"grad_norm": 0.6930619383810027,
"learning_rate": 3.3617021276595745e-06,
"loss": 0.4875,
"mean_token_accuracy": 0.8840958476066589,
"num_tokens": 8420148.0,
"step": 105
},
{
"epoch": 0.8396946564885496,
"grad_norm": 0.584421267267461,
"learning_rate": 3.255319148936171e-06,
"loss": 0.6218,
"mean_token_accuracy": 0.8632862210273743,
"num_tokens": 8811142.0,
"step": 110
},
{
"epoch": 0.8778625954198473,
"grad_norm": 0.6910785224299197,
"learning_rate": 3.1489361702127664e-06,
"loss": 0.5439,
"mean_token_accuracy": 0.8773741364479065,
"num_tokens": 9207371.0,
"step": 115
},
{
"epoch": 0.916030534351145,
"grad_norm": 0.6529252723486935,
"learning_rate": 3.042553191489362e-06,
"loss": 0.5855,
"mean_token_accuracy": 0.8638482332229614,
"num_tokens": 9606796.0,
"step": 120
},
{
"epoch": 0.9541984732824428,
"grad_norm": 0.8472446417517443,
"learning_rate": 2.9361702127659574e-06,
"loss": 0.5128,
"mean_token_accuracy": 0.8809929728507996,
"num_tokens": 10009722.0,
"step": 125
},
{
"epoch": 0.9923664122137404,
"grad_norm": 0.551644409578081,
"learning_rate": 2.8297872340425537e-06,
"loss": 0.4421,
"mean_token_accuracy": 0.8924627542495728,
"num_tokens": 10403148.0,
"step": 130
},
{
"epoch": 1.0305343511450382,
"grad_norm": 0.5625472220277399,
"learning_rate": 2.7234042553191492e-06,
"loss": 0.4711,
"mean_token_accuracy": 0.8930499076843261,
"num_tokens": 10754393.0,
"step": 135
},
{
"epoch": 1.0687022900763359,
"grad_norm": 0.7509720129792777,
"learning_rate": 2.6170212765957447e-06,
"loss": 0.6222,
"mean_token_accuracy": 0.8663328528404236,
"num_tokens": 11130826.0,
"step": 140
},
{
"epoch": 1.1068702290076335,
"grad_norm": 0.618538531749113,
"learning_rate": 2.5106382978723402e-06,
"loss": 0.3556,
"mean_token_accuracy": 0.9134134173393249,
"num_tokens": 11539730.0,
"step": 145
},
{
"epoch": 1.1450381679389312,
"grad_norm": 0.5959373365925987,
"learning_rate": 2.404255319148936e-06,
"loss": 0.5278,
"mean_token_accuracy": 0.8801125884056091,
"num_tokens": 11935963.0,
"step": 150
},
{
"epoch": 1.183206106870229,
"grad_norm": 0.5865155175564103,
"learning_rate": 2.297872340425532e-06,
"loss": 0.4265,
"mean_token_accuracy": 0.8947603702545166,
"num_tokens": 12343667.0,
"step": 155
},
{
"epoch": 1.2213740458015268,
"grad_norm": 0.5659455399478734,
"learning_rate": 2.191489361702128e-06,
"loss": 0.383,
"mean_token_accuracy": 0.9063637614250183,
"num_tokens": 12738487.0,
"step": 160
},
{
"epoch": 1.2595419847328244,
"grad_norm": 0.5226402464959303,
"learning_rate": 2.0851063829787235e-06,
"loss": 0.397,
"mean_token_accuracy": 0.9050293564796448,
"num_tokens": 13144435.0,
"step": 165
},
{
"epoch": 1.297709923664122,
"grad_norm": 0.6417118035542916,
"learning_rate": 1.9787234042553194e-06,
"loss": 0.5203,
"mean_token_accuracy": 0.8780357480049134,
"num_tokens": 13552365.0,
"step": 170
},
{
"epoch": 1.33587786259542,
"grad_norm": 0.5899140268106529,
"learning_rate": 1.872340425531915e-06,
"loss": 0.4622,
"mean_token_accuracy": 0.894351315498352,
"num_tokens": 13952271.0,
"step": 175
},
{
"epoch": 1.3740458015267176,
"grad_norm": 0.6961173596879818,
"learning_rate": 1.7659574468085109e-06,
"loss": 0.448,
"mean_token_accuracy": 0.8905507564544678,
"num_tokens": 14347634.0,
"step": 180
},
{
"epoch": 1.4122137404580153,
"grad_norm": 0.7939108466614023,
"learning_rate": 1.6595744680851064e-06,
"loss": 0.5382,
"mean_token_accuracy": 0.8792445421218872,
"num_tokens": 14749885.0,
"step": 185
},
{
"epoch": 1.450381679389313,
"grad_norm": 0.6478170620292795,
"learning_rate": 1.5531914893617023e-06,
"loss": 0.398,
"mean_token_accuracy": 0.9026367902755738,
"num_tokens": 15159485.0,
"step": 190
},
{
"epoch": 1.4885496183206106,
"grad_norm": 0.6144372382488293,
"learning_rate": 1.4468085106382978e-06,
"loss": 0.3856,
"mean_token_accuracy": 0.9071364164352417,
"num_tokens": 15555175.0,
"step": 195
},
{
"epoch": 1.5267175572519083,
"grad_norm": 0.5792827372554384,
"learning_rate": 1.3404255319148937e-06,
"loss": 0.3427,
"mean_token_accuracy": 0.9158633589744568,
"num_tokens": 15961426.0,
"step": 200
},
{
"epoch": 1.5648854961832062,
"grad_norm": 0.6055405255828548,
"learning_rate": 1.2340425531914894e-06,
"loss": 0.4136,
"mean_token_accuracy": 0.9000585198402404,
"num_tokens": 16365746.0,
"step": 205
},
{
"epoch": 1.6030534351145038,
"grad_norm": 0.6399791944894293,
"learning_rate": 1.1276595744680851e-06,
"loss": 0.4336,
"mean_token_accuracy": 0.8964980363845825,
"num_tokens": 16756037.0,
"step": 210
},
{
"epoch": 1.6412213740458015,
"grad_norm": 0.7671848929989695,
"learning_rate": 1.0212765957446809e-06,
"loss": 0.5051,
"mean_token_accuracy": 0.8887869715690613,
"num_tokens": 17141806.0,
"step": 215
},
{
"epoch": 1.6793893129770994,
"grad_norm": 0.4980858703360447,
"learning_rate": 9.148936170212766e-07,
"loss": 0.3698,
"mean_token_accuracy": 0.9083195209503174,
"num_tokens": 17539462.0,
"step": 220
},
{
"epoch": 1.717557251908397,
"grad_norm": 0.643707317701808,
"learning_rate": 8.085106382978725e-07,
"loss": 0.5255,
"mean_token_accuracy": 0.8803560018539429,
"num_tokens": 17946504.0,
"step": 225
},
{
"epoch": 1.7557251908396947,
"grad_norm": 0.6085852220164623,
"learning_rate": 7.021276595744682e-07,
"loss": 0.4066,
"mean_token_accuracy": 0.8994758605957032,
"num_tokens": 18356104.0,
"step": 230
},
{
"epoch": 1.7938931297709924,
"grad_norm": 0.5913104137597855,
"learning_rate": 5.957446808510639e-07,
"loss": 0.4863,
"mean_token_accuracy": 0.8849827289581299,
"num_tokens": 18757709.0,
"step": 235
},
{
"epoch": 1.83206106870229,
"grad_norm": 0.8448497829484911,
"learning_rate": 4.893617021276596e-07,
"loss": 0.5258,
"mean_token_accuracy": 0.8826202154159546,
"num_tokens": 19165468.0,
"step": 240
},
{
"epoch": 1.8702290076335877,
"grad_norm": 0.5878320328654364,
"learning_rate": 3.8297872340425535e-07,
"loss": 0.4478,
"mean_token_accuracy": 0.8924641013145447,
"num_tokens": 19562227.0,
"step": 245
},
{
"epoch": 1.9083969465648853,
"grad_norm": 0.6221314800813964,
"learning_rate": 2.7659574468085106e-07,
"loss": 0.579,
"mean_token_accuracy": 0.8665671706199646,
"num_tokens": 19958088.0,
"step": 250
},
{
"epoch": 1.9465648854961832,
"grad_norm": 0.4957957383410633,
"learning_rate": 1.7021276595744683e-07,
"loss": 0.3479,
"mean_token_accuracy": 0.9116143703460693,
"num_tokens": 20348442.0,
"step": 255
},
{
"epoch": 1.984732824427481,
"grad_norm": 0.5216187078326494,
"learning_rate": 6.382978723404255e-08,
"loss": 0.3969,
"mean_token_accuracy": 0.9038890838623047,
"num_tokens": 20755096.0,
"step": 260
},
{
"epoch": 2.0,
"mean_token_accuracy": 0.9072179198265076,
"num_tokens": 20867736.0,
"step": 262,
"total_flos": 41921614839808.0,
"train_loss": 0.5300418632176086,
"train_runtime": 511.2418,
"train_samples_per_second": 4.08,
"train_steps_per_second": 0.512
}
],
"logging_steps": 5,
"max_steps": 262,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 41921614839808.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}