| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.5360983102918588, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07680491551459294, | |
| "grad_norm": 0.15998965501785278, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8173, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15360983102918588, | |
| "grad_norm": 0.1599128544330597, | |
| "learning_rate": 0.00019965952472163515, | |
| "loss": 0.5618, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2304147465437788, | |
| "grad_norm": 0.1505613625049591, | |
| "learning_rate": 0.0001986404173548442, | |
| "loss": 0.5308, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.30721966205837176, | |
| "grad_norm": 0.13830986618995667, | |
| "learning_rate": 0.0001969496175169149, | |
| "loss": 0.5155, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38402457757296465, | |
| "grad_norm": 0.12936702370643616, | |
| "learning_rate": 0.00019459863871875693, | |
| "loss": 0.5085, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4608294930875576, | |
| "grad_norm": 0.1287979781627655, | |
| "learning_rate": 0.00019160348996358483, | |
| "loss": 0.4953, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 0.13770239055156708, | |
| "learning_rate": 0.0001879845667335219, | |
| "loss": 0.4883, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6144393241167435, | |
| "grad_norm": 0.12349069118499756, | |
| "learning_rate": 0.00018376651210645086, | |
| "loss": 0.4828, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6912442396313364, | |
| "grad_norm": 0.1296069324016571, | |
| "learning_rate": 0.0001789780489488379, | |
| "loss": 0.4793, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7680491551459293, | |
| "grad_norm": 0.14504997432231903, | |
| "learning_rate": 0.0001736517843272136, | |
| "loss": 0.4772, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8448540706605223, | |
| "grad_norm": 0.13662400841712952, | |
| "learning_rate": 0.00016782398747017174, | |
| "loss": 0.4714, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9216589861751152, | |
| "grad_norm": 0.13271653652191162, | |
| "learning_rate": 0.0001615343427928555, | |
| "loss": 0.4658, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9984639016897081, | |
| "grad_norm": 0.13312594592571259, | |
| "learning_rate": 0.00015482567966571136, | |
| "loss": 0.464, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 0.13460350036621094, | |
| "learning_rate": 0.00014774368076765274, | |
| "loss": 0.4452, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1520737327188941, | |
| "grad_norm": 0.12317976355552673, | |
| "learning_rate": 0.00014033657100960355, | |
| "loss": 0.446, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.228878648233487, | |
| "grad_norm": 0.12744882702827454, | |
| "learning_rate": 0.0001326547891466988, | |
| "loss": 0.443, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.30568356374808, | |
| "grad_norm": 0.13042095303535461, | |
| "learning_rate": 0.00012475064431530065, | |
| "loss": 0.4436, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3824884792626728, | |
| "grad_norm": 0.12759792804718018, | |
| "learning_rate": 0.00011667795983364332, | |
| "loss": 0.4436, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.4592933947772657, | |
| "grad_norm": 0.1353318691253662, | |
| "learning_rate": 0.00010849170669164763, | |
| "loss": 0.4385, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.5360983102918588, | |
| "grad_norm": 0.13270655274391174, | |
| "learning_rate": 0.00010024762922565933, | |
| "loss": 0.4358, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1953, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.8344214986752e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |