Frederick's picture
Add clause model
cf95399
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 3729,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 9.919549477071602e-06,
"loss": 2.5542,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 9.839098954143203e-06,
"loss": 2.0524,
"step": 200
},
{
"epoch": 0.24,
"learning_rate": 9.758648431214803e-06,
"loss": 1.7355,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 9.678197908286404e-06,
"loss": 1.5379,
"step": 400
},
{
"epoch": 0.4,
"learning_rate": 9.597747385358007e-06,
"loss": 1.4171,
"step": 500
},
{
"epoch": 0.48,
"learning_rate": 9.517296862429608e-06,
"loss": 1.3007,
"step": 600
},
{
"epoch": 0.56,
"learning_rate": 9.436846339501209e-06,
"loss": 1.2555,
"step": 700
},
{
"epoch": 0.64,
"learning_rate": 9.35639581657281e-06,
"loss": 1.1933,
"step": 800
},
{
"epoch": 0.72,
"learning_rate": 9.27594529364441e-06,
"loss": 1.211,
"step": 900
},
{
"epoch": 0.8,
"learning_rate": 9.195494770716011e-06,
"loss": 1.1573,
"step": 1000
},
{
"epoch": 0.88,
"learning_rate": 9.11504424778761e-06,
"loss": 1.1225,
"step": 1100
},
{
"epoch": 0.97,
"learning_rate": 9.034593724859211e-06,
"loss": 1.0805,
"step": 1200
},
{
"epoch": 1.05,
"learning_rate": 8.954143201930812e-06,
"loss": 1.0202,
"step": 1300
},
{
"epoch": 1.13,
"learning_rate": 8.873692679002415e-06,
"loss": 1.0192,
"step": 1400
},
{
"epoch": 1.21,
"learning_rate": 8.793242156074015e-06,
"loss": 0.9795,
"step": 1500
},
{
"epoch": 1.29,
"learning_rate": 8.712791633145616e-06,
"loss": 1.0127,
"step": 1600
},
{
"epoch": 1.37,
"learning_rate": 8.632341110217217e-06,
"loss": 0.9978,
"step": 1700
},
{
"epoch": 1.45,
"learning_rate": 8.551890587288818e-06,
"loss": 0.9469,
"step": 1800
},
{
"epoch": 1.53,
"learning_rate": 8.471440064360419e-06,
"loss": 1.0574,
"step": 1900
},
{
"epoch": 1.61,
"learning_rate": 8.39098954143202e-06,
"loss": 0.9992,
"step": 2000
},
{
"epoch": 1.69,
"learning_rate": 8.31053901850362e-06,
"loss": 0.9614,
"step": 2100
},
{
"epoch": 1.77,
"learning_rate": 8.230088495575221e-06,
"loss": 0.9803,
"step": 2200
},
{
"epoch": 1.85,
"learning_rate": 8.149637972646824e-06,
"loss": 1.0217,
"step": 2300
},
{
"epoch": 1.93,
"learning_rate": 8.069187449718425e-06,
"loss": 0.981,
"step": 2400
},
{
"epoch": 2.01,
"learning_rate": 7.988736926790026e-06,
"loss": 0.9652,
"step": 2500
},
{
"epoch": 2.09,
"learning_rate": 7.908286403861627e-06,
"loss": 0.8977,
"step": 2600
},
{
"epoch": 2.17,
"learning_rate": 7.827835880933227e-06,
"loss": 0.8963,
"step": 2700
},
{
"epoch": 2.25,
"learning_rate": 7.747385358004828e-06,
"loss": 0.8518,
"step": 2800
},
{
"epoch": 2.33,
"learning_rate": 7.666934835076429e-06,
"loss": 0.9323,
"step": 2900
},
{
"epoch": 2.41,
"learning_rate": 7.586484312148029e-06,
"loss": 0.8418,
"step": 3000
},
{
"epoch": 2.49,
"learning_rate": 7.50603378921963e-06,
"loss": 0.8833,
"step": 3100
},
{
"epoch": 2.57,
"learning_rate": 7.425583266291232e-06,
"loss": 0.8693,
"step": 3200
},
{
"epoch": 2.65,
"learning_rate": 7.3451327433628326e-06,
"loss": 0.8958,
"step": 3300
},
{
"epoch": 2.74,
"learning_rate": 7.2646822204344334e-06,
"loss": 0.906,
"step": 3400
},
{
"epoch": 2.82,
"learning_rate": 7.184231697506034e-06,
"loss": 0.8296,
"step": 3500
},
{
"epoch": 2.9,
"learning_rate": 7.103781174577635e-06,
"loss": 0.879,
"step": 3600
},
{
"epoch": 2.98,
"learning_rate": 7.023330651649236e-06,
"loss": 0.8278,
"step": 3700
}
],
"max_steps": 12430,
"num_train_epochs": 10,
"total_flos": 3923746763567616.0,
"trial_name": null,
"trial_params": null
}