Rakancorle1
/

ThinkGuard

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Llama-Guard-3-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: ThinkGuard_27k_3epochs_gas16_1.5e
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # ThinkGuard_27k_3epochs_gas16_1.5e
-This model is a fine-tuned version of [meta-llama/Llama-Guard-3-8B](https://huggingface.co/meta-llama/Llama-Guard-3-8B) on an unknown dataset.
 ## Model description

 base_model: meta-llama/Llama-Guard-3-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: ThinkGuard_27k_3epochs_gas16_1.5e
 # ThinkGuard_27k_3epochs_gas16_1.5e
+This model is a fine-tuned version of [meta-llama/Llama-Guard-3-8B](https://huggingface.co/meta-llama/Llama-Guard-3-8B) on the beavertails_27k_GenExplanation dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.994262174488745,
+    "total_flos": 76511791349760.0,
+    "train_loss": 0.29241873632015297,
+    "train_runtime": 70414.7871,
+    "train_samples_per_second": 1.158,
+    "train_steps_per_second": 0.018
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.994262174488745,
+    "total_flos": 76511791349760.0,
+    "train_loss": 0.29241873632015297,
+    "train_runtime": 70414.7871,
+    "train_samples_per_second": 1.158,
+    "train_steps_per_second": 0.018
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,931 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.994262174488745,
+  "eval_steps": 500,
+  "global_step": 1272,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02353979696925114,
+      "grad_norm": 8.782275747399924,
+      "learning_rate": 3.846153846153846e-06,
+      "loss": 1.5306,
+      "step": 10
+    },
+    {
+      "epoch": 0.04707959393850228,
+      "grad_norm": 4.3210560861300085,
+      "learning_rate": 7.692307692307692e-06,
+      "loss": 0.8302,
+      "step": 20
+    },
+    {
+      "epoch": 0.07061939090775342,
+      "grad_norm": 3.023277309045724,
+      "learning_rate": 1.153846153846154e-05,
+      "loss": 0.6217,
+      "step": 30
+    },
+    {
+      "epoch": 0.09415918787700456,
+      "grad_norm": 2.654024929474284,
+      "learning_rate": 1.4999975655288908e-05,
+      "loss": 0.5967,
+      "step": 40
+    },
+    {
+      "epoch": 0.1176989848462557,
+      "grad_norm": 2.8603296276880723,
+      "learning_rate": 1.4997054481186253e-05,
+      "loss": 0.5732,
+      "step": 50
+    },
+    {
+      "epoch": 0.14123878181550684,
+      "grad_norm": 2.4252498205382347,
+      "learning_rate": 1.4989266537738068e-05,
+      "loss": 0.5573,
+      "step": 60
+    },
+    {
+      "epoch": 0.164778578784758,
+      "grad_norm": 2.0008212035639787,
+      "learning_rate": 1.4976616880546462e-05,
+      "loss": 0.5488,
+      "step": 70
+    },
+    {
+      "epoch": 0.18831837575400912,
+      "grad_norm": 2.1468051355877065,
+      "learning_rate": 1.4959113721231613e-05,
+      "loss": 0.5378,
+      "step": 80
+    },
+    {
+      "epoch": 0.21185817272326027,
+      "grad_norm": 2.09758107733118,
+      "learning_rate": 1.4936768422101134e-05,
+      "loss": 0.524,
+      "step": 90
+    },
+    {
+      "epoch": 0.2353979696925114,
+      "grad_norm": 1.9891368777701572,
+      "learning_rate": 1.4909595488774145e-05,
+      "loss": 0.5238,
+      "step": 100
+    },
+    {
+      "epoch": 0.25893776666176255,
+      "grad_norm": 1.8856649935655327,
+      "learning_rate": 1.487761256076484e-05,
+      "loss": 0.51,
+      "step": 110
+    },
+    {
+      "epoch": 0.2824775636310137,
+      "grad_norm": 1.9946450768120385,
+      "learning_rate": 1.4840840400031667e-05,
+      "loss": 0.5045,
+      "step": 120
+    },
+    {
+      "epoch": 0.3060173606002648,
+      "grad_norm": 2.02887277527658,
+      "learning_rate": 1.4799302877499543e-05,
+      "loss": 0.4993,
+      "step": 130
+    },
+    {
+      "epoch": 0.329557157569516,
+      "grad_norm": 1.7846741494593932,
+      "learning_rate": 1.475302695756387e-05,
+      "loss": 0.4959,
+      "step": 140
+    },
+    {
+      "epoch": 0.3530969545387671,
+      "grad_norm": 1.7910316946805966,
+      "learning_rate": 1.4702042680586378e-05,
+      "loss": 0.4903,
+      "step": 150
+    },
+    {
+      "epoch": 0.37663675150801823,
+      "grad_norm": 1.583559495202325,
+      "learning_rate": 1.4646383143394222e-05,
+      "loss": 0.4852,
+      "step": 160
+    },
+    {
+      "epoch": 0.40017654847726936,
+      "grad_norm": 1.8258988651876884,
+      "learning_rate": 1.458608447779491e-05,
+      "loss": 0.4835,
+      "step": 170
+    },
+    {
+      "epoch": 0.42371634544652054,
+      "grad_norm": 1.7923596937902033,
+      "learning_rate": 1.4521185827121071e-05,
+      "loss": 0.474,
+      "step": 180
+    },
+    {
+      "epoch": 0.44725614241577166,
+      "grad_norm": 1.8160198247756183,
+      "learning_rate": 1.4451729320820273e-05,
+      "loss": 0.4721,
+      "step": 190
+    },
+    {
+      "epoch": 0.4707959393850228,
+      "grad_norm": 1.7150789518558318,
+      "learning_rate": 1.437776004710637e-05,
+      "loss": 0.4729,
+      "step": 200
+    },
+    {
+      "epoch": 0.49433573635427397,
+      "grad_norm": 1.7400033525862848,
+      "learning_rate": 1.4299326023690146e-05,
+      "loss": 0.4607,
+      "step": 210
+    },
+    {
+      "epoch": 0.5178755333235251,
+      "grad_norm": 1.7002289663882648,
+      "learning_rate": 1.4216478166608262e-05,
+      "loss": 0.4613,
+      "step": 220
+    },
+    {
+      "epoch": 0.5414153302927762,
+      "grad_norm": 1.534972777417761,
+      "learning_rate": 1.4129270257170722e-05,
+      "loss": 0.4567,
+      "step": 230
+    },
+    {
+      "epoch": 0.5649551272620273,
+      "grad_norm": 1.6489960881148158,
+      "learning_rate": 1.403775890704834e-05,
+      "loss": 0.4559,
+      "step": 240
+    },
+    {
+      "epoch": 0.5884949242312785,
+      "grad_norm": 1.7846842786992185,
+      "learning_rate": 1.3942003521522837e-05,
+      "loss": 0.4624,
+      "step": 250
+    },
+    {
+      "epoch": 0.6120347212005296,
+      "grad_norm": 1.4351473554704604,
+      "learning_rate": 1.3842066260923462e-05,
+      "loss": 0.4496,
+      "step": 260
+    },
+    {
+      "epoch": 0.6355745181697808,
+      "grad_norm": 1.541683257911224,
+      "learning_rate": 1.3738012000275142e-05,
+      "loss": 0.4516,
+      "step": 270
+    },
+    {
+      "epoch": 0.659114315139032,
+      "grad_norm": 1.5879015034427175,
+      "learning_rate": 1.3629908287184381e-05,
+      "loss": 0.4475,
+      "step": 280
+    },
+    {
+      "epoch": 0.6826541121082831,
+      "grad_norm": 1.728309878285744,
+      "learning_rate": 1.3517825297990205e-05,
+      "loss": 0.4446,
+      "step": 290
+    },
+    {
+      "epoch": 0.7061939090775342,
+      "grad_norm": 1.4690146169046678,
+      "learning_rate": 1.340183579220868e-05,
+      "loss": 0.4491,
+      "step": 300
+    },
+    {
+      "epoch": 0.7297337060467853,
+      "grad_norm": 1.4969895208337947,
+      "learning_rate": 1.328201506530052e-05,
+      "loss": 0.4436,
+      "step": 310
+    },
+    {
+      "epoch": 0.7532735030160365,
+      "grad_norm": 1.4385449709842155,
+      "learning_rate": 1.3158440899792465e-05,
+      "loss": 0.4344,
+      "step": 320
+    },
+    {
+      "epoch": 0.7768132999852876,
+      "grad_norm": 1.4819229027772451,
+      "learning_rate": 1.3031193514784178e-05,
+      "loss": 0.4387,
+      "step": 330
+    },
+    {
+      "epoch": 0.8003530969545387,
+      "grad_norm": 1.5138328167674073,
+      "learning_rate": 1.2900355513873408e-05,
+      "loss": 0.4374,
+      "step": 340
+    },
+    {
+      "epoch": 0.82389289392379,
+      "grad_norm": 1.5486731806719658,
+      "learning_rate": 1.276601183153324e-05,
+      "loss": 0.4268,
+      "step": 350
+    },
+    {
+      "epoch": 0.8474326908930411,
+      "grad_norm": 1.3966287731811042,
+      "learning_rate": 1.2628249677976246e-05,
+      "loss": 0.4222,
+      "step": 360
+    },
+    {
+      "epoch": 0.8709724878622922,
+      "grad_norm": 1.4851117277296138,
+      "learning_rate": 1.2487158482541324e-05,
+      "loss": 0.4304,
+      "step": 370
+    },
+    {
+      "epoch": 0.8945122848315433,
+      "grad_norm": 1.4886042812511937,
+      "learning_rate": 1.2342829835639957e-05,
+      "loss": 0.4327,
+      "step": 380
+    },
+    {
+      "epoch": 0.9180520818007945,
+      "grad_norm": 1.5916975841301912,
+      "learning_rate": 1.219535742929962e-05,
+      "loss": 0.4312,
+      "step": 390
+    },
+    {
+      "epoch": 0.9415918787700456,
+      "grad_norm": 1.4359537454947529,
+      "learning_rate": 1.204483699634289e-05,
+      "loss": 0.418,
+      "step": 400
+    },
+    {
+      "epoch": 0.9651316757392967,
+      "grad_norm": 1.5039520247360716,
+      "learning_rate": 1.1891366248241758e-05,
+      "loss": 0.4169,
+      "step": 410
+    },
+    {
+      "epoch": 0.9886714727085479,
+      "grad_norm": 1.4396432279411446,
+      "learning_rate": 1.1735044811687508e-05,
+      "loss": 0.4165,
+      "step": 420
+    },
+    {
+      "epoch": 1.012211269677799,
+      "grad_norm": 1.4837321963480583,
+      "learning_rate": 1.1575974163917282e-05,
+      "loss": 0.3505,
+      "step": 430
+    },
+    {
+      "epoch": 1.0357510666470502,
+      "grad_norm": 1.438403278814582,
+      "learning_rate": 1.1414257566839376e-05,
+      "loss": 0.2795,
+      "step": 440
+    },
+    {
+      "epoch": 1.0592908636163012,
+      "grad_norm": 1.510877062877757,
+      "learning_rate": 1.125e-05,
+      "loss": 0.2814,
+      "step": 450
+    },
+    {
+      "epoch": 1.0828306605855524,
+      "grad_norm": 1.519349451859131,
+      "learning_rate": 1.1083308092435006e-05,
+      "loss": 0.2773,
+      "step": 460
+    },
+    {
+      "epoch": 1.1063704575548037,
+      "grad_norm": 1.3106222683180944,
+      "learning_rate": 1.091429005345085e-05,
+      "loss": 0.2706,
+      "step": 470
+    },
+    {
+      "epoch": 1.1299102545240547,
+      "grad_norm": 1.3984600885535579,
+      "learning_rate": 1.0743055602379712e-05,
+      "loss": 0.2754,
+      "step": 480
+    },
+    {
+      "epoch": 1.153450051493306,
+      "grad_norm": 1.4833593619381575,
+      "learning_rate": 1.0569715897354354e-05,
+      "loss": 0.2756,
+      "step": 490
+    },
+    {
+      "epoch": 1.176989848462557,
+      "grad_norm": 1.3932629119237105,
+      "learning_rate": 1.0394383463148993e-05,
+      "loss": 0.27,
+      "step": 500
+    },
+    {
+      "epoch": 1.2005296454318082,
+      "grad_norm": 1.4062493836827707,
+      "learning_rate": 1.0217172118132994e-05,
+      "loss": 0.2703,
+      "step": 510
+    },
+    {
+      "epoch": 1.2240694424010594,
+      "grad_norm": 1.4874549162201474,
+      "learning_rate": 1.003819690038481e-05,
+      "loss": 0.2739,
+      "step": 520
+    },
+    {
+      "epoch": 1.2476092393703104,
+      "grad_norm": 1.3708361425521698,
+      "learning_rate": 9.857573993014161e-06,
+      "loss": 0.2754,
+      "step": 530
+    },
+    {
+      "epoch": 1.2711490363395614,
+      "grad_norm": 1.3978327948674458,
+      "learning_rate": 9.675420648740886e-06,
+      "loss": 0.2692,
+      "step": 540
+    },
+    {
+      "epoch": 1.2946888333088127,
+      "grad_norm": 1.4715538000650579,
+      "learning_rate": 9.491855113779456e-06,
+      "loss": 0.2694,
+      "step": 550
+    },
+    {
+      "epoch": 1.318228630278064,
+      "grad_norm": 1.3468394374097452,
+      "learning_rate": 9.306996551078556e-06,
+      "loss": 0.2678,
+      "step": 560
+    },
+    {
+      "epoch": 1.341768427247315,
+      "grad_norm": 1.5175300555093134,
+      "learning_rate": 9.120964962965565e-06,
+      "loss": 0.266,
+      "step": 570
+    },
+    {
+      "epoch": 1.3653082242165662,
+      "grad_norm": 1.317724218624981,
+      "learning_rate": 8.933881113246134e-06,
+      "loss": 0.2666,
+      "step": 580
+    },
+    {
+      "epoch": 1.3888480211858174,
+      "grad_norm": 1.3809839276376867,
+      "learning_rate": 8.74586644880946e-06,
+      "loss": 0.2672,
+      "step": 590
+    },
+    {
+      "epoch": 1.4123878181550684,
+      "grad_norm": 1.3120307514577167,
+      "learning_rate": 8.557043020790113e-06,
+      "loss": 0.2671,
+      "step": 600
+    },
+    {
+      "epoch": 1.4359276151243194,
+      "grad_norm": 1.5865169519570839,
+      "learning_rate": 8.367533405337635e-06,
+      "loss": 0.2669,
+      "step": 610
+    },
+    {
+      "epoch": 1.4594674120935707,
+      "grad_norm": 1.315530128518467,
+      "learning_rate": 8.177460624045303e-06,
+      "loss": 0.2619,
+      "step": 620
+    },
+    {
+      "epoch": 1.483007209062822,
+      "grad_norm": 1.2729315130214236,
+      "learning_rate": 7.986948064089741e-06,
+      "loss": 0.2625,
+      "step": 630
+    },
+    {
+      "epoch": 1.506547006032073,
+      "grad_norm": 1.2890769997858393,
+      "learning_rate": 7.7961193981332e-06,
+      "loss": 0.2624,
+      "step": 640
+    },
+    {
+      "epoch": 1.5300868030013242,
+      "grad_norm": 1.344250246429933,
+      "learning_rate": 7.605098504040519e-06,
+      "loss": 0.2571,
+      "step": 650
+    },
+    {
+      "epoch": 1.5536265999705754,
+      "grad_norm": 1.3112449299655193,
+      "learning_rate": 7.414009384462882e-06,
+      "loss": 0.2603,
+      "step": 660
+    },
+    {
+      "epoch": 1.5771663969398264,
+      "grad_norm": 1.3722682469514345,
+      "learning_rate": 7.22297608634056e-06,
+      "loss": 0.2592,
+      "step": 670
+    },
+    {
+      "epoch": 1.6007061939090774,
+      "grad_norm": 1.3170860077023523,
+      "learning_rate": 7.032122620376899e-06,
+      "loss": 0.2535,
+      "step": 680
+    },
+    {
+      "epoch": 1.6242459908783287,
+      "grad_norm": 1.3106743856940115,
+      "learning_rate": 6.841572880535854e-06,
+      "loss": 0.2531,
+      "step": 690
+    },
+    {
+      "epoch": 1.64778578784758,
+      "grad_norm": 1.2798600011553958,
+      "learning_rate": 6.6514505636152814e-06,
+      "loss": 0.2548,
+      "step": 700
+    },
+    {
+      "epoch": 1.671325584816831,
+      "grad_norm": 1.3968866134114841,
+      "learning_rate": 6.461879088948227e-06,
+      "loss": 0.2507,
+      "step": 710
+    },
+    {
+      "epoch": 1.6948653817860821,
+      "grad_norm": 1.378986621794313,
+      "learning_rate": 6.272981518284342e-06,
+      "loss": 0.249,
+      "step": 720
+    },
+    {
+      "epoch": 1.7184051787553334,
+      "grad_norm": 1.2776388271568753,
+      "learning_rate": 6.084880475903424e-06,
+      "loss": 0.2451,
+      "step": 730
+    },
+    {
+      "epoch": 1.7419449757245844,
+      "grad_norm": 1.317908633074014,
+      "learning_rate": 5.897698069012938e-06,
+      "loss": 0.2484,
+      "step": 740
+    },
+    {
+      "epoch": 1.7654847726938354,
+      "grad_norm": 1.2702586608772435,
+      "learning_rate": 5.711555808481213e-06,
+      "loss": 0.2486,
+      "step": 750
+    },
+    {
+      "epoch": 1.7890245696630867,
+      "grad_norm": 1.3790950940287556,
+      "learning_rate": 5.526574529957729e-06,
+      "loss": 0.2451,
+      "step": 760
+    },
+    {
+      "epoch": 1.8125643666323379,
+      "grad_norm": 1.417794065597115,
+      "learning_rate": 5.342874315431765e-06,
+      "loss": 0.2442,
+      "step": 770
+    },
+    {
+      "epoch": 1.836104163601589,
+      "grad_norm": 1.3118057182766703,
+      "learning_rate": 5.160574415280257e-06,
+      "loss": 0.2438,
+      "step": 780
+    },
+    {
+      "epoch": 1.85964396057084,
+      "grad_norm": 1.380491802365371,
+      "learning_rate": 4.979793170855537e-06,
+      "loss": 0.2368,
+      "step": 790
+    },
+    {
+      "epoch": 1.8831837575400914,
+      "grad_norm": 1.3404717450970274,
+      "learning_rate": 4.800647937663126e-06,
+      "loss": 0.2393,
+      "step": 800
+    },
+    {
+      "epoch": 1.9067235545093424,
+      "grad_norm": 1.3084085289097462,
+      "learning_rate": 4.623255009179547e-06,
+      "loss": 0.2402,
+      "step": 810
+    },
+    {
+      "epoch": 1.9302633514785934,
+      "grad_norm": 1.2819422568439027,
+      "learning_rate": 4.4477295413595385e-06,
+      "loss": 0.2359,
+      "step": 820
+    },
+    {
+      "epoch": 1.9538031484478446,
+      "grad_norm": 1.2506810831080168,
+      "learning_rate": 4.274185477881681e-06,
+      "loss": 0.2365,
+      "step": 830
+    },
+    {
+      "epoch": 1.9773429454170959,
+      "grad_norm": 1.236732854007973,
+      "learning_rate": 4.102735476181019e-06,
+      "loss": 0.2308,
+      "step": 840
+    },
+    {
+      "epoch": 2.000882742386347,
+      "grad_norm": 1.2110215418076606,
+      "learning_rate": 3.933490834316633e-06,
+      "loss": 0.2284,
+      "step": 850
+    },
+    {
+      "epoch": 2.024422539355598,
+      "grad_norm": 1.4341116240165481,
+      "learning_rate": 3.7665614187216603e-06,
+      "loss": 0.128,
+      "step": 860
+    },
+    {
+      "epoch": 2.0479623363248494,
+      "grad_norm": 1.1448959546708688,
+      "learning_rate": 3.6020555928826935e-06,
+      "loss": 0.1201,
+      "step": 870
+    },
+    {
+      "epoch": 2.0715021332941004,
+      "grad_norm": 1.1891044799932367,
+      "learning_rate": 3.4400801469947734e-06,
+      "loss": 0.1196,
+      "step": 880
+    },
+    {
+      "epoch": 2.0950419302633514,
+      "grad_norm": 1.2226413834941532,
+      "learning_rate": 3.2807402286377675e-06,
+      "loss": 0.1183,
+      "step": 890
+    },
+    {
+      "epoch": 2.1185817272326024,
+      "grad_norm": 1.3069856648534286,
+      "learning_rate": 3.1241392745189985e-06,
+      "loss": 0.1181,
+      "step": 900
+    },
+    {
+      "epoch": 2.142121524201854,
+      "grad_norm": 1.1702804083458354,
+      "learning_rate": 2.970378943326543e-06,
+      "loss": 0.1166,
+      "step": 910
+    },
+    {
+      "epoch": 2.165661321171105,
+      "grad_norm": 1.2124612400300803,
+      "learning_rate": 2.8195590497367222e-06,
+      "loss": 0.1155,
+      "step": 920
+    },
+    {
+      "epoch": 2.189201118140356,
+      "grad_norm": 1.1611863697964633,
+      "learning_rate": 2.671777499618652e-06,
+      "loss": 0.1159,
+      "step": 930
+    },
+    {
+      "epoch": 2.2127409151096074,
+      "grad_norm": 1.1087717944945865,
+      "learning_rate": 2.527130226477926e-06,
+      "loss": 0.1168,
+      "step": 940
+    },
+    {
+      "epoch": 2.2362807120788584,
+      "grad_norm": 1.2482164891383611,
+      "learning_rate": 2.3857111291806348e-06,
+      "loss": 0.114,
+      "step": 950
+    },
+    {
+      "epoch": 2.2598205090481094,
+      "grad_norm": 1.1704503748862236,
+      "learning_rate": 2.2476120109982267e-06,
+      "loss": 0.1146,
+      "step": 960
+    },
+    {
+      "epoch": 2.283360306017361,
+      "grad_norm": 1.1831598264423941,
+      "learning_rate": 2.1129225200127196e-06,
+      "loss": 0.1142,
+      "step": 970
+    },
+    {
+      "epoch": 2.306900102986612,
+      "grad_norm": 1.1923513771478957,
+      "learning_rate": 1.981730090920969e-06,
+      "loss": 0.1121,
+      "step": 980
+    },
+    {
+      "epoch": 2.330439899955863,
+      "grad_norm": 1.1650306486853608,
+      "learning_rate": 1.8541198882757892e-06,
+      "loss": 0.1103,
+      "step": 990
+    },
+    {
+      "epoch": 2.353979696925114,
+      "grad_norm": 1.2474184378379198,
+      "learning_rate": 1.73017475120072e-06,
+      "loss": 0.1111,
+      "step": 1000
+    },
+    {
+      "epoch": 2.3775194938943653,
+      "grad_norm": 1.1410121185645685,
+      "learning_rate": 1.6099751396144142e-06,
+      "loss": 0.1101,
+      "step": 1010
+    },
+    {
+      "epoch": 2.4010592908636164,
+      "grad_norm": 1.1781449006163716,
+      "learning_rate": 1.493599081999453e-06,
+      "loss": 0.1085,
+      "step": 1020
+    },
+    {
+      "epoch": 2.4245990878328674,
+      "grad_norm": 1.095164172281657,
+      "learning_rate": 1.3811221247495784e-06,
+      "loss": 0.109,
+      "step": 1030
+    },
+    {
+      "epoch": 2.448138884802119,
+      "grad_norm": 1.1290274158456068,
+      "learning_rate": 1.2726172831281898e-06,
+      "loss": 0.1083,
+      "step": 1040
+    },
+    {
+      "epoch": 2.47167868177137,
+      "grad_norm": 1.1205826114027153,
+      "learning_rate": 1.1681549938699157e-06,
+      "loss": 0.1079,
+      "step": 1050
+    },
+    {
+      "epoch": 2.495218478740621,
+      "grad_norm": 1.237936841602485,
+      "learning_rate": 1.0678030694561044e-06,
+      "loss": 0.1084,
+      "step": 1060
+    },
+    {
+      "epoch": 2.518758275709872,
+      "grad_norm": 1.2748079747165908,
+      "learning_rate": 9.716266540938183e-07,
+      "loss": 0.1084,
+      "step": 1070
+    },
+    {
+      "epoch": 2.542298072679123,
+      "grad_norm": 1.1620787715820426,
+      "learning_rate": 8.796881814269941e-07,
+      "loss": 0.106,
+      "step": 1080
+    },
+    {
+      "epoch": 2.5658378696483743,
+      "grad_norm": 1.1814024146181927,
+      "learning_rate": 7.92047334007169e-07,
+      "loss": 0.1037,
+      "step": 1090
+    },
+    {
+      "epoch": 2.5893776666176254,
+      "grad_norm": 1.1411929584644598,
+      "learning_rate": 7.087610045501023e-07,
+      "loss": 0.1044,
+      "step": 1100
+    },
+    {
+      "epoch": 2.612917463586877,
+      "grad_norm": 1.1770255689497389,
+      "learning_rate": 6.29883259003445e-07,
+      "loss": 0.1051,
+      "step": 1110
+    },
+    {
+      "epoch": 2.636457260556128,
+      "grad_norm": 1.1667654713458508,
+      "learning_rate": 5.554653014494176e-07,
+      "loss": 0.1031,
+      "step": 1120
+    },
+    {
+      "epoch": 2.659997057525379,
+      "grad_norm": 1.238846596793772,
+      "learning_rate": 4.855554408652985e-07,
+      "loss": 0.1039,
+      "step": 1130
+    },
+    {
+      "epoch": 2.68353685449463,
+      "grad_norm": 1.0962748439341052,
+      "learning_rate": 4.201990597632832e-07,
+      "loss": 0.1032,
+      "step": 1140
+    },
+    {
+      "epoch": 2.707076651463881,
+      "grad_norm": 1.1228066231093363,
+      "learning_rate": 3.5943858473009237e-07,
+      "loss": 0.105,
+      "step": 1150
+    },
+    {
+      "epoch": 2.7306164484331323,
+      "grad_norm": 1.1681919060053207,
+      "learning_rate": 3.033134588854289e-07,
+      "loss": 0.1029,
+      "step": 1160
+    },
+    {
+      "epoch": 2.7541562454023834,
+      "grad_norm": 1.107535834090668,
+      "learning_rate": 2.518601162771755e-07,
+      "loss": 0.1048,
+      "step": 1170
+    },
+    {
+      "epoch": 2.777696042371635,
+      "grad_norm": 1.0985797806813495,
+      "learning_rate": 2.0511195822997013e-07,
+      "loss": 0.102,
+      "step": 1180
+    },
+    {
+      "epoch": 2.801235839340886,
+      "grad_norm": 1.1563052202999995,
+      "learning_rate": 1.6309933166247403e-07,
+      "loss": 0.1028,
+      "step": 1190
+    },
+    {
+      "epoch": 2.824775636310137,
+      "grad_norm": 1.1299722934234167,
+      "learning_rate": 1.258495093874454e-07,
+      "loss": 0.1006,
+      "step": 1200
+    },
+    {
+      "epoch": 2.848315433279388,
+      "grad_norm": 1.0731524173833442,
+      "learning_rate": 9.338667240738619e-08,
+      "loss": 0.1029,
+      "step": 1210
+    },
+    {
+      "epoch": 2.871855230248639,
+      "grad_norm": 1.0991693593720597,
+      "learning_rate": 6.573189421726466e-08,
+      "loss": 0.102,
+      "step": 1220
+    },
+    {
+      "epoch": 2.8953950272178903,
+      "grad_norm": 1.1501966264186627,
+      "learning_rate": 4.2903127124496454e-08,
+      "loss": 0.1032,
+      "step": 1230
+    },
+    {
+      "epoch": 2.9189348241871413,
+      "grad_norm": 1.1357187349706255,
+      "learning_rate": 2.4915190595068415e-08,
+      "loss": 0.1031,
+      "step": 1240
+    },
+    {
+      "epoch": 2.942474621156393,
+      "grad_norm": 1.1136651822369734,
+      "learning_rate": 1.1779761633370633e-08,
+      "loss": 0.0998,
+      "step": 1250
+    },
+    {
+      "epoch": 2.966014418125644,
+      "grad_norm": 1.1212278780119938,
+      "learning_rate": 3.50536720197997e-09,
+      "loss": 0.1005,
+      "step": 1260
+    },
+    {
+      "epoch": 2.989554215094895,
+      "grad_norm": 1.1503104789107468,
+      "learning_rate": 9.73786863237014e-11,
+      "loss": 0.1019,
+      "step": 1270
+    },
+    {
+      "epoch": 2.994262174488745,
+      "step": 1272,
+      "total_flos": 76511791349760.0,
+      "train_loss": 0.29241873632015297,
+      "train_runtime": 70414.7871,
+      "train_samples_per_second": 1.158,
+      "train_steps_per_second": 0.018
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1272,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 76511791349760.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed