Spaces:

Fas1
/

capybara_fas_ai

Runtime error

App Files Files Community

capybara_fas_ai / capybara-finetuned /checkpoint-493 /trainer_state.json

Fas1

Add Space config for Hugging Face6

f278544 7 months ago

raw

history blame contribute delete

9.31 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 493,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.02028397565922921,
	"grad_norm": 1.1712552309036255,
	"learning_rate": 0.0001993238674780257,
	"loss": 14.4092,
	"step": 10
	},
	{
	"epoch": 0.04056795131845842,
	"grad_norm": 0.48309916257858276,
	"learning_rate": 0.0001979716024340771,
	"loss": 0.2359,
	"step": 20
	},
	{
	"epoch": 0.060851926977687626,
	"grad_norm": 0.2885732352733612,
	"learning_rate": 0.00019661933739012849,
	"loss": 0.1483,
	"step": 30
	},
	{
	"epoch": 0.08113590263691683,
	"grad_norm": 0.2794722318649292,
	"learning_rate": 0.00019526707234617988,
	"loss": 0.1758,
	"step": 40
	},
	{
	"epoch": 0.10141987829614604,
	"grad_norm": 0.21984700858592987,
	"learning_rate": 0.00019391480730223125,
	"loss": 0.248,
	"step": 50
	},
	{
	"epoch": 0.12170385395537525,
	"grad_norm": 0.32381802797317505,
	"learning_rate": 0.00019256254225828262,
	"loss": 0.1618,
	"step": 60
	},
	{
	"epoch": 0.14198782961460446,
	"grad_norm": 0.28723254799842834,
	"learning_rate": 0.00019121027721433401,
	"loss": 0.1171,
	"step": 70
	},
	{
	"epoch": 0.16227180527383367,
	"grad_norm": 0.2172032743692398,
	"learning_rate": 0.0001898580121703854,
	"loss": 0.1621,
	"step": 80
	},
	{
	"epoch": 0.18255578093306288,
	"grad_norm": 0.23619882762432098,
	"learning_rate": 0.00018850574712643678,
	"loss": 0.1198,
	"step": 90
	},
	{
	"epoch": 0.2028397565922921,
	"grad_norm": 0.22938676178455353,
	"learning_rate": 0.00018715348208248818,
	"loss": 0.1546,
	"step": 100
	},
	{
	"epoch": 0.2231237322515213,
	"grad_norm": 0.30202436447143555,
	"learning_rate": 0.00018580121703853957,
	"loss": 0.0521,
	"step": 110
	},
	{
	"epoch": 0.2434077079107505,
	"grad_norm": 0.9085210561752319,
	"learning_rate": 0.00018444895199459097,
	"loss": 0.1844,
	"step": 120
	},
	{
	"epoch": 0.26369168356997974,
	"grad_norm": 0.33736127614974976,
	"learning_rate": 0.00018309668695064234,
	"loss": 0.0827,
	"step": 130
	},
	{
	"epoch": 0.2839756592292089,
	"grad_norm": 0.13767044246196747,
	"learning_rate": 0.00018174442190669373,
	"loss": 0.1092,
	"step": 140
	},
	{
	"epoch": 0.30425963488843816,
	"grad_norm": 0.2765495181083679,
	"learning_rate": 0.0001803921568627451,
	"loss": 0.1144,
	"step": 150
	},
	{
	"epoch": 0.32454361054766734,
	"grad_norm": 0.13134893774986267,
	"learning_rate": 0.0001790398918187965,
	"loss": 0.0914,
	"step": 160
	},
	{
	"epoch": 0.3448275862068966,
	"grad_norm": 0.15877078473567963,
	"learning_rate": 0.00017768762677484786,
	"loss": 0.0787,
	"step": 170
	},
	{
	"epoch": 0.36511156186612576,
	"grad_norm": 0.11392869800329208,
	"learning_rate": 0.00017633536173089926,
	"loss": 0.0483,
	"step": 180
	},
	{
	"epoch": 0.385395537525355,
	"grad_norm": 0.230561301112175,
	"learning_rate": 0.00017498309668695066,
	"loss": 0.0601,
	"step": 190
	},
	{
	"epoch": 0.4056795131845842,
	"grad_norm": 0.20257052779197693,
	"learning_rate": 0.00017363083164300205,
	"loss": 0.1253,
	"step": 200
	},
	{
	"epoch": 0.4259634888438134,
	"grad_norm": 0.25743618607521057,
	"learning_rate": 0.00017227856659905342,
	"loss": 0.0684,
	"step": 210
	},
	{
	"epoch": 0.4462474645030426,
	"grad_norm": 0.4015647768974304,
	"learning_rate": 0.00017092630155510482,
	"loss": 0.0734,
	"step": 220
	},
	{
	"epoch": 0.4665314401622718,
	"grad_norm": 1.1545509099960327,
	"learning_rate": 0.00016957403651115619,
	"loss": 0.1491,
	"step": 230
	},
	{
	"epoch": 0.486815415821501,
	"grad_norm": 0.3187839984893799,
	"learning_rate": 0.00016822177146720758,
	"loss": 0.101,
	"step": 240
	},
	{
	"epoch": 0.5070993914807302,
	"grad_norm": 0.1046043112874031,
	"learning_rate": 0.00016686950642325895,
	"loss": 0.0917,
	"step": 250
	},
	{
	"epoch": 0.5273833671399595,
	"grad_norm": 0.13540491461753845,
	"learning_rate": 0.00016551724137931035,
	"loss": 0.0516,
	"step": 260
	},
	{
	"epoch": 0.5476673427991886,
	"grad_norm": 0.33530837297439575,
	"learning_rate": 0.00016416497633536174,
	"loss": 0.1529,
	"step": 270
	},
	{
	"epoch": 0.5679513184584178,
	"grad_norm": 0.7428917288780212,
	"learning_rate": 0.00016281271129141314,
	"loss": 0.1771,
	"step": 280
	},
	{
	"epoch": 0.5882352941176471,
	"grad_norm": 0.11154314875602722,
	"learning_rate": 0.0001614604462474645,
	"loss": 0.0386,
	"step": 290
	},
	{
	"epoch": 0.6085192697768763,
	"grad_norm": 0.09267372637987137,
	"learning_rate": 0.0001601081812035159,
	"loss": 0.0625,
	"step": 300
	},
	{
	"epoch": 0.6288032454361054,
	"grad_norm": 0.24139204621315002,
	"learning_rate": 0.0001587559161595673,
	"loss": 0.1699,
	"step": 310
	},
	{
	"epoch": 0.6490872210953347,
	"grad_norm": 0.6370412111282349,
	"learning_rate": 0.00015740365111561867,
	"loss": 0.1857,
	"step": 320
	},
	{
	"epoch": 0.6693711967545639,
	"grad_norm": 0.07758647203445435,
	"learning_rate": 0.00015605138607167004,
	"loss": 0.1178,
	"step": 330
	},
	{
	"epoch": 0.6896551724137931,
	"grad_norm": 0.2090945690870285,
	"learning_rate": 0.00015469912102772143,
	"loss": 0.1247,
	"step": 340
	},
	{
	"epoch": 0.7099391480730223,
	"grad_norm": 0.24821089208126068,
	"learning_rate": 0.00015334685598377283,
	"loss": 0.1128,
	"step": 350
	},
	{
	"epoch": 0.7302231237322515,
	"grad_norm": 0.123480886220932,
	"learning_rate": 0.00015199459093982422,
	"loss": 0.1625,
	"step": 360
	},
	{
	"epoch": 0.7505070993914807,
	"grad_norm": 0.3176027238368988,
	"learning_rate": 0.0001506423258958756,
	"loss": 0.1215,
	"step": 370
	},
	{
	"epoch": 0.77079107505071,
	"grad_norm": 0.17533642053604126,
	"learning_rate": 0.000149290060851927,
	"loss": 0.1062,
	"step": 380
	},
	{
	"epoch": 0.7910750507099391,
	"grad_norm": 0.2660926878452301,
	"learning_rate": 0.00014793779580797838,
	"loss": 0.0877,
	"step": 390
	},
	{
	"epoch": 0.8113590263691683,
	"grad_norm": 0.5134332180023193,
	"learning_rate": 0.00014658553076402975,
	"loss": 0.0596,
	"step": 400
	},
	{
	"epoch": 0.8316430020283976,
	"grad_norm": 0.09723437577486038,
	"learning_rate": 0.00014523326572008115,
	"loss": 0.0448,
	"step": 410
	},
	{
	"epoch": 0.8519269776876268,
	"grad_norm": 0.40922313928604126,
	"learning_rate": 0.00014388100067613252,
	"loss": 0.0898,
	"step": 420
	},
	{
	"epoch": 0.8722109533468559,
	"grad_norm": 0.14394080638885498,
	"learning_rate": 0.0001425287356321839,
	"loss": 0.1304,
	"step": 430
	},
	{
	"epoch": 0.8924949290060852,
	"grad_norm": 0.578838050365448,
	"learning_rate": 0.0001411764705882353,
	"loss": 0.1038,
	"step": 440
	},
	{
	"epoch": 0.9127789046653144,
	"grad_norm": 0.46690645813941956,
	"learning_rate": 0.00013982420554428668,
	"loss": 0.2278,
	"step": 450
	},
	{
	"epoch": 0.9330628803245437,
	"grad_norm": 0.29226213693618774,
	"learning_rate": 0.00013847194050033807,
	"loss": 0.0747,
	"step": 460
	},
	{
	"epoch": 0.9533468559837728,
	"grad_norm": 0.14866693317890167,
	"learning_rate": 0.00013711967545638947,
	"loss": 0.1133,
	"step": 470
	},
	{
	"epoch": 0.973630831643002,
	"grad_norm": 0.08057406544685364,
	"learning_rate": 0.00013576741041244087,
	"loss": 0.0955,
	"step": 480
	},
	{
	"epoch": 0.9939148073022313,
	"grad_norm": 0.0708722174167633,
	"learning_rate": 0.00013441514536849223,
	"loss": 0.1018,
	"step": 490
	}
	],
	"logging_steps": 10,
	"max_steps": 1479,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 8109068113674240.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}