Transformers
PyTorch
TensorBoard
t5
text2text-generation
Generated from Trainer
text-generation-inference
Instructions to use abletobetable/rut5-base-absum-tech-support-calls with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use abletobetable/rut5-base-absum-tech-support-calls with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("abletobetable/rut5-base-absum-tech-support-calls") model = AutoModelForSeq2SeqLM.from_pretrained("abletobetable/rut5-base-absum-tech-support-calls") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.3778, | |
| "best_model_checkpoint": "rut5-base-absum-tech-support-calls/checkpoint-2000", | |
| "epoch": 144.44444444444446, | |
| "global_step": 2600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 5e-06, | |
| "loss": 2.7022, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_avg_rouge_f": 0.0, | |
| "eval_gen_len": 6.875, | |
| "eval_loss": 2.296959400177002, | |
| "eval_rouge-1": 0.0, | |
| "eval_rouge-2": 0.0, | |
| "eval_rouge-l": 0.0, | |
| "eval_runtime": 3.1977, | |
| "eval_samples_per_second": 2.502, | |
| "eval_steps_per_second": 2.502, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 1e-05, | |
| "loss": 2.2932, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "eval_avg_rouge_f": 0.0, | |
| "eval_gen_len": 10.375, | |
| "eval_loss": 1.8183443546295166, | |
| "eval_rouge-1": 0.0, | |
| "eval_rouge-2": 0.0, | |
| "eval_rouge-l": 0.0, | |
| "eval_runtime": 1.7212, | |
| "eval_samples_per_second": 4.648, | |
| "eval_steps_per_second": 4.648, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.8234, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "eval_avg_rouge_f": 0.3018, | |
| "eval_gen_len": 14.0, | |
| "eval_loss": 1.4890449047088623, | |
| "eval_rouge-1": 0.3588, | |
| "eval_rouge-2": 0.2205, | |
| "eval_rouge-l": 0.3262, | |
| "eval_runtime": 2.5527, | |
| "eval_samples_per_second": 3.134, | |
| "eval_steps_per_second": 3.134, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3727, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "eval_avg_rouge_f": 0.2771, | |
| "eval_gen_len": 12.375, | |
| "eval_loss": 1.373950719833374, | |
| "eval_rouge-1": 0.3493, | |
| "eval_rouge-2": 0.1653, | |
| "eval_rouge-l": 0.3167, | |
| "eval_runtime": 2.0484, | |
| "eval_samples_per_second": 3.906, | |
| "eval_steps_per_second": 3.906, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "learning_rate": 1.9767441860465116e-05, | |
| "loss": 1.0367, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "eval_avg_rouge_f": 0.1974, | |
| "eval_gen_len": 15.375, | |
| "eval_loss": 1.3832566738128662, | |
| "eval_rouge-1": 0.2607, | |
| "eval_rouge-2": 0.0984, | |
| "eval_rouge-l": 0.2331, | |
| "eval_runtime": 3.0521, | |
| "eval_samples_per_second": 2.621, | |
| "eval_steps_per_second": 2.621, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "learning_rate": 1.9534883720930235e-05, | |
| "loss": 0.841, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "eval_avg_rouge_f": 0.3055, | |
| "eval_gen_len": 16.0, | |
| "eval_loss": 1.3515713214874268, | |
| "eval_rouge-1": 0.3713, | |
| "eval_rouge-2": 0.1857, | |
| "eval_rouge-l": 0.3594, | |
| "eval_runtime": 2.4808, | |
| "eval_samples_per_second": 3.225, | |
| "eval_steps_per_second": 3.225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 1.9302325581395353e-05, | |
| "loss": 0.7182, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "eval_avg_rouge_f": 0.2672, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 1.3606797456741333, | |
| "eval_rouge-1": 0.3352, | |
| "eval_rouge-2": 0.143, | |
| "eval_rouge-l": 0.3233, | |
| "eval_runtime": 2.5469, | |
| "eval_samples_per_second": 3.141, | |
| "eval_steps_per_second": 3.141, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "learning_rate": 1.9069767441860468e-05, | |
| "loss": 0.5102, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "eval_avg_rouge_f": 0.2849, | |
| "eval_gen_len": 16.625, | |
| "eval_loss": 1.3673444986343384, | |
| "eval_rouge-1": 0.36, | |
| "eval_rouge-2": 0.1597, | |
| "eval_rouge-l": 0.3349, | |
| "eval_runtime": 2.6789, | |
| "eval_samples_per_second": 2.986, | |
| "eval_steps_per_second": 2.986, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 1.8837209302325582e-05, | |
| "loss": 0.4595, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_avg_rouge_f": 0.3228, | |
| "eval_gen_len": 17.125, | |
| "eval_loss": 1.371541976928711, | |
| "eval_rouge-1": 0.3892, | |
| "eval_rouge-2": 0.2153, | |
| "eval_rouge-l": 0.3641, | |
| "eval_runtime": 2.6184, | |
| "eval_samples_per_second": 3.055, | |
| "eval_steps_per_second": 3.055, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "learning_rate": 1.86046511627907e-05, | |
| "loss": 0.3886, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "eval_avg_rouge_f": 0.3252, | |
| "eval_gen_len": 16.375, | |
| "eval_loss": 1.4634039402008057, | |
| "eval_rouge-1": 0.3801, | |
| "eval_rouge-2": 0.2274, | |
| "eval_rouge-l": 0.3682, | |
| "eval_runtime": 2.8205, | |
| "eval_samples_per_second": 2.836, | |
| "eval_steps_per_second": 2.836, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 30.56, | |
| "learning_rate": 1.8372093023255815e-05, | |
| "loss": 0.3158, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 30.56, | |
| "eval_avg_rouge_f": 0.331, | |
| "eval_gen_len": 16.75, | |
| "eval_loss": 1.5123608112335205, | |
| "eval_rouge-1": 0.3938, | |
| "eval_rouge-2": 0.2319, | |
| "eval_rouge-l": 0.3672, | |
| "eval_runtime": 3.0478, | |
| "eval_samples_per_second": 2.625, | |
| "eval_steps_per_second": 2.625, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "learning_rate": 1.813953488372093e-05, | |
| "loss": 0.2687, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "eval_avg_rouge_f": 0.3468, | |
| "eval_gen_len": 16.5, | |
| "eval_loss": 1.5868151187896729, | |
| "eval_rouge-1": 0.3987, | |
| "eval_rouge-2": 0.2568, | |
| "eval_rouge-l": 0.3848, | |
| "eval_runtime": 2.5696, | |
| "eval_samples_per_second": 3.113, | |
| "eval_steps_per_second": 3.113, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 36.11, | |
| "learning_rate": 1.790697674418605e-05, | |
| "loss": 0.2361, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 36.11, | |
| "eval_avg_rouge_f": 0.3163, | |
| "eval_gen_len": 17.75, | |
| "eval_loss": 1.6459990739822388, | |
| "eval_rouge-1": 0.375, | |
| "eval_rouge-2": 0.2107, | |
| "eval_rouge-l": 0.3631, | |
| "eval_runtime": 2.8706, | |
| "eval_samples_per_second": 2.787, | |
| "eval_steps_per_second": 2.787, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 38.89, | |
| "learning_rate": 1.7674418604651163e-05, | |
| "loss": 0.1991, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 38.89, | |
| "eval_avg_rouge_f": 0.3085, | |
| "eval_gen_len": 16.25, | |
| "eval_loss": 1.6946561336517334, | |
| "eval_rouge-1": 0.3605, | |
| "eval_rouge-2": 0.2177, | |
| "eval_rouge-l": 0.3474, | |
| "eval_runtime": 2.519, | |
| "eval_samples_per_second": 3.176, | |
| "eval_steps_per_second": 3.176, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 41.67, | |
| "learning_rate": 1.744186046511628e-05, | |
| "loss": 0.151, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 41.67, | |
| "eval_avg_rouge_f": 0.3222, | |
| "eval_gen_len": 16.5, | |
| "eval_loss": 1.8248298168182373, | |
| "eval_rouge-1": 0.3832, | |
| "eval_rouge-2": 0.2274, | |
| "eval_rouge-l": 0.3559, | |
| "eval_runtime": 2.6923, | |
| "eval_samples_per_second": 2.971, | |
| "eval_steps_per_second": 2.971, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "learning_rate": 1.7209302325581396e-05, | |
| "loss": 0.1517, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "eval_avg_rouge_f": 0.3811, | |
| "eval_gen_len": 16.875, | |
| "eval_loss": 1.7883902788162231, | |
| "eval_rouge-1": 0.4309, | |
| "eval_rouge-2": 0.294, | |
| "eval_rouge-l": 0.4184, | |
| "eval_runtime": 2.5559, | |
| "eval_samples_per_second": 3.13, | |
| "eval_steps_per_second": 3.13, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 47.22, | |
| "learning_rate": 1.697674418604651e-05, | |
| "loss": 0.1444, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 47.22, | |
| "eval_avg_rouge_f": 0.322, | |
| "eval_gen_len": 17.125, | |
| "eval_loss": 1.8518762588500977, | |
| "eval_rouge-1": 0.3843, | |
| "eval_rouge-2": 0.2107, | |
| "eval_rouge-l": 0.3711, | |
| "eval_runtime": 2.7195, | |
| "eval_samples_per_second": 2.942, | |
| "eval_steps_per_second": 2.942, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 1.674418604651163e-05, | |
| "loss": 0.1106, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_avg_rouge_f": 0.3209, | |
| "eval_gen_len": 17.5, | |
| "eval_loss": 1.9637408256530762, | |
| "eval_rouge-1": 0.383, | |
| "eval_rouge-2": 0.2107, | |
| "eval_rouge-l": 0.3691, | |
| "eval_runtime": 2.6625, | |
| "eval_samples_per_second": 3.005, | |
| "eval_steps_per_second": 3.005, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 52.78, | |
| "learning_rate": 1.6511627906976747e-05, | |
| "loss": 0.0961, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 52.78, | |
| "eval_avg_rouge_f": 0.3103, | |
| "eval_gen_len": 16.75, | |
| "eval_loss": 2.07181715965271, | |
| "eval_rouge-1": 0.3645, | |
| "eval_rouge-2": 0.2177, | |
| "eval_rouge-l": 0.3488, | |
| "eval_runtime": 2.7383, | |
| "eval_samples_per_second": 2.921, | |
| "eval_steps_per_second": 2.921, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 55.56, | |
| "learning_rate": 1.6279069767441862e-05, | |
| "loss": 0.1131, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 55.56, | |
| "eval_avg_rouge_f": 0.3067, | |
| "eval_gen_len": 16.75, | |
| "eval_loss": 1.9934816360473633, | |
| "eval_rouge-1": 0.3602, | |
| "eval_rouge-2": 0.2153, | |
| "eval_rouge-l": 0.3446, | |
| "eval_runtime": 2.6295, | |
| "eval_samples_per_second": 3.042, | |
| "eval_steps_per_second": 3.042, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 58.33, | |
| "learning_rate": 1.6046511627906977e-05, | |
| "loss": 0.0996, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 58.33, | |
| "eval_avg_rouge_f": 0.3712, | |
| "eval_gen_len": 16.0, | |
| "eval_loss": 2.06162166595459, | |
| "eval_rouge-1": 0.4153, | |
| "eval_rouge-2": 0.2986, | |
| "eval_rouge-l": 0.3996, | |
| "eval_runtime": 3.0388, | |
| "eval_samples_per_second": 2.633, | |
| "eval_steps_per_second": 2.633, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 61.11, | |
| "learning_rate": 1.5813953488372095e-05, | |
| "loss": 0.0663, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 61.11, | |
| "eval_avg_rouge_f": 0.3786, | |
| "eval_gen_len": 14.625, | |
| "eval_loss": 2.1466333866119385, | |
| "eval_rouge-1": 0.4257, | |
| "eval_rouge-2": 0.301, | |
| "eval_rouge-l": 0.409, | |
| "eval_runtime": 3.1902, | |
| "eval_samples_per_second": 2.508, | |
| "eval_steps_per_second": 2.508, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 63.89, | |
| "learning_rate": 1.558139534883721e-05, | |
| "loss": 0.0789, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 63.89, | |
| "eval_avg_rouge_f": 0.3728, | |
| "eval_gen_len": 16.0, | |
| "eval_loss": 2.1657214164733887, | |
| "eval_rouge-1": 0.4166, | |
| "eval_rouge-2": 0.301, | |
| "eval_rouge-l": 0.4009, | |
| "eval_runtime": 2.4781, | |
| "eval_samples_per_second": 3.228, | |
| "eval_steps_per_second": 3.228, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "learning_rate": 1.5348837209302328e-05, | |
| "loss": 0.073, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "eval_avg_rouge_f": 0.3713, | |
| "eval_gen_len": 16.25, | |
| "eval_loss": 2.251979351043701, | |
| "eval_rouge-1": 0.4131, | |
| "eval_rouge-2": 0.301, | |
| "eval_rouge-l": 0.3999, | |
| "eval_runtime": 3.4503, | |
| "eval_samples_per_second": 2.319, | |
| "eval_steps_per_second": 2.319, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 69.44, | |
| "learning_rate": 1.5116279069767443e-05, | |
| "loss": 0.0739, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 69.44, | |
| "eval_avg_rouge_f": 0.3051, | |
| "eval_gen_len": 17.0, | |
| "eval_loss": 2.260244369506836, | |
| "eval_rouge-1": 0.3582, | |
| "eval_rouge-2": 0.2145, | |
| "eval_rouge-l": 0.3426, | |
| "eval_runtime": 2.5948, | |
| "eval_samples_per_second": 3.083, | |
| "eval_steps_per_second": 3.083, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 72.22, | |
| "learning_rate": 1.488372093023256e-05, | |
| "loss": 0.0799, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 72.22, | |
| "eval_avg_rouge_f": 0.3156, | |
| "eval_gen_len": 16.75, | |
| "eval_loss": 2.3278074264526367, | |
| "eval_rouge-1": 0.369, | |
| "eval_rouge-2": 0.2242, | |
| "eval_rouge-l": 0.3534, | |
| "eval_runtime": 3.3341, | |
| "eval_samples_per_second": 2.399, | |
| "eval_steps_per_second": 2.399, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "learning_rate": 1.4651162790697674e-05, | |
| "loss": 0.0546, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_avg_rouge_f": 0.3164, | |
| "eval_gen_len": 16.5, | |
| "eval_loss": 2.402118444442749, | |
| "eval_rouge-1": 0.369, | |
| "eval_rouge-2": 0.2242, | |
| "eval_rouge-l": 0.3559, | |
| "eval_runtime": 2.5497, | |
| "eval_samples_per_second": 3.138, | |
| "eval_steps_per_second": 3.138, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 77.78, | |
| "learning_rate": 1.441860465116279e-05, | |
| "loss": 0.0674, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 77.78, | |
| "eval_avg_rouge_f": 0.3697, | |
| "eval_gen_len": 17.25, | |
| "eval_loss": 2.3492679595947266, | |
| "eval_rouge-1": 0.4149, | |
| "eval_rouge-2": 0.2924, | |
| "eval_rouge-l": 0.4017, | |
| "eval_runtime": 3.1613, | |
| "eval_samples_per_second": 2.531, | |
| "eval_steps_per_second": 2.531, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 80.56, | |
| "learning_rate": 1.4186046511627909e-05, | |
| "loss": 0.0459, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 80.56, | |
| "eval_avg_rouge_f": 0.3839, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 2.3503048419952393, | |
| "eval_rouge-1": 0.426, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4104, | |
| "eval_runtime": 2.4756, | |
| "eval_samples_per_second": 3.232, | |
| "eval_steps_per_second": 3.232, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 83.33, | |
| "learning_rate": 1.3953488372093025e-05, | |
| "loss": 0.0501, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 83.33, | |
| "eval_avg_rouge_f": 0.3732, | |
| "eval_gen_len": 15.375, | |
| "eval_loss": 2.371870517730713, | |
| "eval_rouge-1": 0.4172, | |
| "eval_rouge-2": 0.301, | |
| "eval_rouge-l": 0.4016, | |
| "eval_runtime": 2.8658, | |
| "eval_samples_per_second": 2.792, | |
| "eval_steps_per_second": 2.792, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 86.11, | |
| "learning_rate": 1.372093023255814e-05, | |
| "loss": 0.0509, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 86.11, | |
| "eval_avg_rouge_f": 0.3926, | |
| "eval_gen_len": 16.375, | |
| "eval_loss": 2.4419479370117188, | |
| "eval_rouge-1": 0.4361, | |
| "eval_rouge-2": 0.3188, | |
| "eval_rouge-l": 0.4229, | |
| "eval_runtime": 3.0315, | |
| "eval_samples_per_second": 2.639, | |
| "eval_steps_per_second": 2.639, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "learning_rate": 1.3488372093023257e-05, | |
| "loss": 0.0449, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "eval_avg_rouge_f": 0.4026, | |
| "eval_gen_len": 16.375, | |
| "eval_loss": 2.3171658515930176, | |
| "eval_rouge-1": 0.4514, | |
| "eval_rouge-2": 0.3188, | |
| "eval_rouge-l": 0.4375, | |
| "eval_runtime": 3.636, | |
| "eval_samples_per_second": 2.2, | |
| "eval_steps_per_second": 2.2, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 91.67, | |
| "learning_rate": 1.3255813953488372e-05, | |
| "loss": 0.0408, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 91.67, | |
| "eval_avg_rouge_f": 0.3906, | |
| "eval_gen_len": 16.25, | |
| "eval_loss": 2.4437549114227295, | |
| "eval_rouge-1": 0.4349, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4217, | |
| "eval_runtime": 2.4939, | |
| "eval_samples_per_second": 3.208, | |
| "eval_steps_per_second": 3.208, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 94.44, | |
| "learning_rate": 1.302325581395349e-05, | |
| "loss": 0.0357, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 94.44, | |
| "eval_avg_rouge_f": 0.3831, | |
| "eval_gen_len": 16.25, | |
| "eval_loss": 2.540635108947754, | |
| "eval_rouge-1": 0.4236, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4104, | |
| "eval_runtime": 3.5767, | |
| "eval_samples_per_second": 2.237, | |
| "eval_steps_per_second": 2.237, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 97.22, | |
| "learning_rate": 1.2790697674418606e-05, | |
| "loss": 0.0403, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 97.22, | |
| "eval_avg_rouge_f": 0.3748, | |
| "eval_gen_len": 16.375, | |
| "eval_loss": 2.4441065788269043, | |
| "eval_rouge-1": 0.4111, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.398, | |
| "eval_runtime": 2.5203, | |
| "eval_samples_per_second": 3.174, | |
| "eval_steps_per_second": 3.174, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 1.2558139534883723e-05, | |
| "loss": 0.0489, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_avg_rouge_f": 0.3768, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 2.459872245788574, | |
| "eval_rouge-1": 0.4154, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.3997, | |
| "eval_runtime": 3.4884, | |
| "eval_samples_per_second": 2.293, | |
| "eval_steps_per_second": 2.293, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 102.78, | |
| "learning_rate": 1.2325581395348838e-05, | |
| "loss": 0.032, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 102.78, | |
| "eval_avg_rouge_f": 0.407, | |
| "eval_gen_len": 15.0, | |
| "eval_loss": 2.623534679412842, | |
| "eval_rouge-1": 0.4515, | |
| "eval_rouge-2": 0.3335, | |
| "eval_rouge-l": 0.4359, | |
| "eval_runtime": 2.3389, | |
| "eval_samples_per_second": 3.42, | |
| "eval_steps_per_second": 3.42, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 105.56, | |
| "learning_rate": 1.2093023255813954e-05, | |
| "loss": 0.0379, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 105.56, | |
| "eval_avg_rouge_f": 0.407, | |
| "eval_gen_len": 15.125, | |
| "eval_loss": 2.6058127880096436, | |
| "eval_rouge-1": 0.4515, | |
| "eval_rouge-2": 0.3335, | |
| "eval_rouge-l": 0.4359, | |
| "eval_runtime": 3.2938, | |
| "eval_samples_per_second": 2.429, | |
| "eval_steps_per_second": 2.429, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 108.33, | |
| "learning_rate": 1.1860465116279072e-05, | |
| "loss": 0.0466, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 108.33, | |
| "eval_avg_rouge_f": 0.3768, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 2.5748019218444824, | |
| "eval_rouge-1": 0.4154, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.3997, | |
| "eval_runtime": 2.4802, | |
| "eval_samples_per_second": 3.226, | |
| "eval_steps_per_second": 3.226, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "learning_rate": 1.1627906976744187e-05, | |
| "loss": 0.0317, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "eval_avg_rouge_f": 0.3778, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 2.663809299468994, | |
| "eval_rouge-1": 0.4169, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4013, | |
| "eval_runtime": 3.2301, | |
| "eval_samples_per_second": 2.477, | |
| "eval_steps_per_second": 2.477, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 113.89, | |
| "learning_rate": 1.1395348837209304e-05, | |
| "loss": 0.0234, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 113.89, | |
| "eval_avg_rouge_f": 0.3888, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.740657091140747, | |
| "eval_rouge-1": 0.4334, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4178, | |
| "eval_runtime": 4.1851, | |
| "eval_samples_per_second": 1.912, | |
| "eval_steps_per_second": 1.912, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 116.67, | |
| "learning_rate": 1.116279069767442e-05, | |
| "loss": 0.0308, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 116.67, | |
| "eval_avg_rouge_f": 0.3799, | |
| "eval_gen_len": 16.125, | |
| "eval_loss": 2.70857572555542, | |
| "eval_rouge-1": 0.4201, | |
| "eval_rouge-2": 0.3153, | |
| "eval_rouge-l": 0.4044, | |
| "eval_runtime": 2.5313, | |
| "eval_samples_per_second": 3.16, | |
| "eval_steps_per_second": 3.16, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 119.44, | |
| "learning_rate": 1.0930232558139535e-05, | |
| "loss": 0.0305, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 119.44, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.7068228721618652, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.7634, | |
| "eval_samples_per_second": 2.895, | |
| "eval_steps_per_second": 2.895, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 122.22, | |
| "learning_rate": 1.0697674418604651e-05, | |
| "loss": 0.0289, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 122.22, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.8503403663635254, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.4338, | |
| "eval_samples_per_second": 3.287, | |
| "eval_steps_per_second": 3.287, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 125.0, | |
| "learning_rate": 1.046511627906977e-05, | |
| "loss": 0.0555, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 125.0, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.8522231578826904, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.8814, | |
| "eval_samples_per_second": 2.776, | |
| "eval_steps_per_second": 2.776, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 127.78, | |
| "learning_rate": 1.0232558139534884e-05, | |
| "loss": 0.022, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 127.78, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.9057185649871826, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.43, | |
| "eval_samples_per_second": 3.292, | |
| "eval_steps_per_second": 3.292, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 130.56, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0369, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 130.56, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.8735642433166504, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 3.0784, | |
| "eval_samples_per_second": 2.599, | |
| "eval_steps_per_second": 2.599, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 133.33, | |
| "learning_rate": 9.767441860465117e-06, | |
| "loss": 0.0195, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 133.33, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.7636728286743164, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.4413, | |
| "eval_samples_per_second": 3.277, | |
| "eval_steps_per_second": 3.277, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 136.11, | |
| "learning_rate": 9.534883720930234e-06, | |
| "loss": 0.0387, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 136.11, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.743685007095337, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.9809, | |
| "eval_samples_per_second": 2.684, | |
| "eval_steps_per_second": 2.684, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 138.89, | |
| "learning_rate": 9.30232558139535e-06, | |
| "loss": 0.0298, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 138.89, | |
| "eval_avg_rouge_f": 0.3443, | |
| "eval_gen_len": 16.25, | |
| "eval_loss": 2.8817646503448486, | |
| "eval_rouge-1": 0.391, | |
| "eval_rouge-2": 0.2665, | |
| "eval_rouge-l": 0.3754, | |
| "eval_runtime": 2.5341, | |
| "eval_samples_per_second": 3.157, | |
| "eval_steps_per_second": 3.157, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 141.67, | |
| "learning_rate": 9.069767441860465e-06, | |
| "loss": 0.0265, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 141.67, | |
| "eval_avg_rouge_f": 0.3353, | |
| "eval_gen_len": 16.5, | |
| "eval_loss": 2.834005355834961, | |
| "eval_rouge-1": 0.3776, | |
| "eval_rouge-2": 0.2665, | |
| "eval_rouge-l": 0.362, | |
| "eval_runtime": 3.3656, | |
| "eval_samples_per_second": 2.377, | |
| "eval_steps_per_second": 2.377, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 144.44, | |
| "learning_rate": 8.837209302325582e-06, | |
| "loss": 0.0182, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 144.44, | |
| "eval_avg_rouge_f": 0.3598, | |
| "eval_gen_len": 15.5, | |
| "eval_loss": 2.873906135559082, | |
| "eval_rouge-1": 0.4059, | |
| "eval_rouge-2": 0.2831, | |
| "eval_rouge-l": 0.3902, | |
| "eval_runtime": 2.4328, | |
| "eval_samples_per_second": 3.288, | |
| "eval_steps_per_second": 3.288, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 144.44, | |
| "step": 2600, | |
| "total_flos": 3409770731258880.0, | |
| "train_loss": 0.29600492647060983, | |
| "train_runtime": 1522.0987, | |
| "train_samples_per_second": 8.869, | |
| "train_steps_per_second": 2.956 | |
| } | |
| ], | |
| "max_steps": 4500, | |
| "num_train_epochs": 250, | |
| "total_flos": 3409770731258880.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |