Fas1's picture
Add Space config for Hugging Face6
f278544
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 493,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02028397565922921,
"grad_norm": 1.1712552309036255,
"learning_rate": 0.0001993238674780257,
"loss": 14.4092,
"step": 10
},
{
"epoch": 0.04056795131845842,
"grad_norm": 0.48309916257858276,
"learning_rate": 0.0001979716024340771,
"loss": 0.2359,
"step": 20
},
{
"epoch": 0.060851926977687626,
"grad_norm": 0.2885732352733612,
"learning_rate": 0.00019661933739012849,
"loss": 0.1483,
"step": 30
},
{
"epoch": 0.08113590263691683,
"grad_norm": 0.2794722318649292,
"learning_rate": 0.00019526707234617988,
"loss": 0.1758,
"step": 40
},
{
"epoch": 0.10141987829614604,
"grad_norm": 0.21984700858592987,
"learning_rate": 0.00019391480730223125,
"loss": 0.248,
"step": 50
},
{
"epoch": 0.12170385395537525,
"grad_norm": 0.32381802797317505,
"learning_rate": 0.00019256254225828262,
"loss": 0.1618,
"step": 60
},
{
"epoch": 0.14198782961460446,
"grad_norm": 0.28723254799842834,
"learning_rate": 0.00019121027721433401,
"loss": 0.1171,
"step": 70
},
{
"epoch": 0.16227180527383367,
"grad_norm": 0.2172032743692398,
"learning_rate": 0.0001898580121703854,
"loss": 0.1621,
"step": 80
},
{
"epoch": 0.18255578093306288,
"grad_norm": 0.23619882762432098,
"learning_rate": 0.00018850574712643678,
"loss": 0.1198,
"step": 90
},
{
"epoch": 0.2028397565922921,
"grad_norm": 0.22938676178455353,
"learning_rate": 0.00018715348208248818,
"loss": 0.1546,
"step": 100
},
{
"epoch": 0.2231237322515213,
"grad_norm": 0.30202436447143555,
"learning_rate": 0.00018580121703853957,
"loss": 0.0521,
"step": 110
},
{
"epoch": 0.2434077079107505,
"grad_norm": 0.9085210561752319,
"learning_rate": 0.00018444895199459097,
"loss": 0.1844,
"step": 120
},
{
"epoch": 0.26369168356997974,
"grad_norm": 0.33736127614974976,
"learning_rate": 0.00018309668695064234,
"loss": 0.0827,
"step": 130
},
{
"epoch": 0.2839756592292089,
"grad_norm": 0.13767044246196747,
"learning_rate": 0.00018174442190669373,
"loss": 0.1092,
"step": 140
},
{
"epoch": 0.30425963488843816,
"grad_norm": 0.2765495181083679,
"learning_rate": 0.0001803921568627451,
"loss": 0.1144,
"step": 150
},
{
"epoch": 0.32454361054766734,
"grad_norm": 0.13134893774986267,
"learning_rate": 0.0001790398918187965,
"loss": 0.0914,
"step": 160
},
{
"epoch": 0.3448275862068966,
"grad_norm": 0.15877078473567963,
"learning_rate": 0.00017768762677484786,
"loss": 0.0787,
"step": 170
},
{
"epoch": 0.36511156186612576,
"grad_norm": 0.11392869800329208,
"learning_rate": 0.00017633536173089926,
"loss": 0.0483,
"step": 180
},
{
"epoch": 0.385395537525355,
"grad_norm": 0.230561301112175,
"learning_rate": 0.00017498309668695066,
"loss": 0.0601,
"step": 190
},
{
"epoch": 0.4056795131845842,
"grad_norm": 0.20257052779197693,
"learning_rate": 0.00017363083164300205,
"loss": 0.1253,
"step": 200
},
{
"epoch": 0.4259634888438134,
"grad_norm": 0.25743618607521057,
"learning_rate": 0.00017227856659905342,
"loss": 0.0684,
"step": 210
},
{
"epoch": 0.4462474645030426,
"grad_norm": 0.4015647768974304,
"learning_rate": 0.00017092630155510482,
"loss": 0.0734,
"step": 220
},
{
"epoch": 0.4665314401622718,
"grad_norm": 1.1545509099960327,
"learning_rate": 0.00016957403651115619,
"loss": 0.1491,
"step": 230
},
{
"epoch": 0.486815415821501,
"grad_norm": 0.3187839984893799,
"learning_rate": 0.00016822177146720758,
"loss": 0.101,
"step": 240
},
{
"epoch": 0.5070993914807302,
"grad_norm": 0.1046043112874031,
"learning_rate": 0.00016686950642325895,
"loss": 0.0917,
"step": 250
},
{
"epoch": 0.5273833671399595,
"grad_norm": 0.13540491461753845,
"learning_rate": 0.00016551724137931035,
"loss": 0.0516,
"step": 260
},
{
"epoch": 0.5476673427991886,
"grad_norm": 0.33530837297439575,
"learning_rate": 0.00016416497633536174,
"loss": 0.1529,
"step": 270
},
{
"epoch": 0.5679513184584178,
"grad_norm": 0.7428917288780212,
"learning_rate": 0.00016281271129141314,
"loss": 0.1771,
"step": 280
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.11154314875602722,
"learning_rate": 0.0001614604462474645,
"loss": 0.0386,
"step": 290
},
{
"epoch": 0.6085192697768763,
"grad_norm": 0.09267372637987137,
"learning_rate": 0.0001601081812035159,
"loss": 0.0625,
"step": 300
},
{
"epoch": 0.6288032454361054,
"grad_norm": 0.24139204621315002,
"learning_rate": 0.0001587559161595673,
"loss": 0.1699,
"step": 310
},
{
"epoch": 0.6490872210953347,
"grad_norm": 0.6370412111282349,
"learning_rate": 0.00015740365111561867,
"loss": 0.1857,
"step": 320
},
{
"epoch": 0.6693711967545639,
"grad_norm": 0.07758647203445435,
"learning_rate": 0.00015605138607167004,
"loss": 0.1178,
"step": 330
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.2090945690870285,
"learning_rate": 0.00015469912102772143,
"loss": 0.1247,
"step": 340
},
{
"epoch": 0.7099391480730223,
"grad_norm": 0.24821089208126068,
"learning_rate": 0.00015334685598377283,
"loss": 0.1128,
"step": 350
},
{
"epoch": 0.7302231237322515,
"grad_norm": 0.123480886220932,
"learning_rate": 0.00015199459093982422,
"loss": 0.1625,
"step": 360
},
{
"epoch": 0.7505070993914807,
"grad_norm": 0.3176027238368988,
"learning_rate": 0.0001506423258958756,
"loss": 0.1215,
"step": 370
},
{
"epoch": 0.77079107505071,
"grad_norm": 0.17533642053604126,
"learning_rate": 0.000149290060851927,
"loss": 0.1062,
"step": 380
},
{
"epoch": 0.7910750507099391,
"grad_norm": 0.2660926878452301,
"learning_rate": 0.00014793779580797838,
"loss": 0.0877,
"step": 390
},
{
"epoch": 0.8113590263691683,
"grad_norm": 0.5134332180023193,
"learning_rate": 0.00014658553076402975,
"loss": 0.0596,
"step": 400
},
{
"epoch": 0.8316430020283976,
"grad_norm": 0.09723437577486038,
"learning_rate": 0.00014523326572008115,
"loss": 0.0448,
"step": 410
},
{
"epoch": 0.8519269776876268,
"grad_norm": 0.40922313928604126,
"learning_rate": 0.00014388100067613252,
"loss": 0.0898,
"step": 420
},
{
"epoch": 0.8722109533468559,
"grad_norm": 0.14394080638885498,
"learning_rate": 0.0001425287356321839,
"loss": 0.1304,
"step": 430
},
{
"epoch": 0.8924949290060852,
"grad_norm": 0.578838050365448,
"learning_rate": 0.0001411764705882353,
"loss": 0.1038,
"step": 440
},
{
"epoch": 0.9127789046653144,
"grad_norm": 0.46690645813941956,
"learning_rate": 0.00013982420554428668,
"loss": 0.2278,
"step": 450
},
{
"epoch": 0.9330628803245437,
"grad_norm": 0.29226213693618774,
"learning_rate": 0.00013847194050033807,
"loss": 0.0747,
"step": 460
},
{
"epoch": 0.9533468559837728,
"grad_norm": 0.14866693317890167,
"learning_rate": 0.00013711967545638947,
"loss": 0.1133,
"step": 470
},
{
"epoch": 0.973630831643002,
"grad_norm": 0.08057406544685364,
"learning_rate": 0.00013576741041244087,
"loss": 0.0955,
"step": 480
},
{
"epoch": 0.9939148073022313,
"grad_norm": 0.0708722174167633,
"learning_rate": 0.00013441514536849223,
"loss": 0.1018,
"step": 490
}
],
"logging_steps": 10,
"max_steps": 1479,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8109068113674240.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}