Model save

Browse files

Files changed (5) hide show

README.md +4 -4
adapter_config.json +5 -5
adapter_model.safetensors +1 -1
trainer_state.json +597 -44
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -34,11 +34,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 16
-- total_train_batch_size: 256
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
@@ -50,7 +50,7 @@ The following hyperparameters were used during training:
 ### Framework versions
-- PEFT 0.13.1
 - Transformers 4.45.2
 - Pytorch 2.4.1
-- Tokenizers 0.20.0

 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 6
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 16
+- total_train_batch_size: 96
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
 ### Framework versions
+- PEFT 0.13.2
 - Transformers 4.45.2
 - Pytorch 2.4.1
+- Tokenizers 0.20.1

adapter_config.json CHANGED Viewed

@@ -20,14 +20,14 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "v_proj",
     "o_proj",
     "up_proj",
     "q_proj",
-    "down_proj",
-    "embed_tokens",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "down_proj",
     "up_proj",
     "q_proj",
+    "k_proj",
+    "gate_proj",
+    "v_proj",
+    "embed_tokens"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a0a06819e001b9eefbd94c71227e66d3db99f193baea73fa2ee74b95454fef5
 size 555869984

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa56b2cb584e33c3b9e45c3a6a47be96d6dfe9276366a19a61f491823c915764
 size 555869984

trainer_state.json CHANGED Viewed

@@ -1,81 +1,634 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.0,
   "eval_steps": 500,
-  "global_step": 2060,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.4854368932038835,
-      "grad_norm": 2.4547414779663086,
-      "learning_rate": 8.794946550048592e-05,
-      "loss": 0.8255,
       "step": 250
     },
     {
-      "epoch": 0.970873786407767,
-      "grad_norm": 1.9669880867004395,
-      "learning_rate": 7.580174927113704e-05,
-      "loss": 0.4218,
       "step": 500
     },
     {
-      "epoch": 1.4563106796116505,
-      "grad_norm": 1.8506665229797363,
-      "learning_rate": 6.365403304178815e-05,
-      "loss": 0.381,
       "step": 750
     },
     {
-      "epoch": 1.941747572815534,
-      "grad_norm": 1.784641146659851,
-      "learning_rate": 5.150631681243926e-05,
-      "loss": 0.3659,
       "step": 1000
     },
     {
-      "epoch": 2.4271844660194173,
-      "grad_norm": 1.7636547088623047,
-      "learning_rate": 3.9358600583090386e-05,
-      "loss": 0.3546,
       "step": 1250
     },
     {
-      "epoch": 2.912621359223301,
-      "grad_norm": 1.9939128160476685,
-      "learning_rate": 2.72108843537415e-05,
-      "loss": 0.3462,
       "step": 1500
     },
     {
-      "epoch": 3.3980582524271843,
-      "grad_norm": 2.054591178894043,
-      "learning_rate": 1.5063168124392615e-05,
-      "loss": 0.3333,
       "step": 1750
     },
     {
-      "epoch": 3.883495145631068,
-      "grad_norm": 2.025630474090576,
-      "learning_rate": 2.915451895043732e-06,
-      "loss": 0.3259,
       "step": 2000
     },
     {
-      "epoch": 4.0,
-      "step": 2060,
-      "total_flos": 4.9999787325650566e+17,
-      "train_loss": 0.4165122569186016,
-      "train_runtime": 13883.9171,
-      "train_samples_per_second": 37.983,
-      "train_steps_per_second": 0.148
     }
   ],
   "logging_steps": 250,
-  "max_steps": 2060,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 500,
@@ -91,8 +644,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.9999787325650566e+17,
-  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.9998179584034954,
   "eval_steps": 500,
+  "global_step": 21972,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.045510399126200335,
+      "grad_norm": 2.0404157638549805,
+      "learning_rate": 9.887118798361402e-05,
+      "loss": 0.8444,
       "step": 250
     },
     {
+      "epoch": 0.09102079825240067,
+      "grad_norm": 2.918970823287964,
+      "learning_rate": 9.773327264451525e-05,
+      "loss": 0.546,
       "step": 500
     },
     {
+      "epoch": 0.136531197378601,
+      "grad_norm": 2.195496082305908,
+      "learning_rate": 9.659535730541647e-05,
+      "loss": 0.5034,
       "step": 750
     },
     {
+      "epoch": 0.18204159650480134,
+      "grad_norm": 2.3301496505737305,
+      "learning_rate": 9.54574419663177e-05,
+      "loss": 0.48,
       "step": 1000
     },
     {
+      "epoch": 0.22755199563100167,
+      "grad_norm": 2.254934549331665,
+      "learning_rate": 9.431952662721894e-05,
+      "loss": 0.462,
       "step": 1250
     },
     {
+      "epoch": 0.273062394757202,
+      "grad_norm": 2.273568630218506,
+      "learning_rate": 9.318161128812017e-05,
+      "loss": 0.4531,
       "step": 1500
     },
     {
+      "epoch": 0.31857279388340237,
+      "grad_norm": 2.2925281524658203,
+      "learning_rate": 9.204369594902141e-05,
+      "loss": 0.4442,
       "step": 1750
     },
     {
+      "epoch": 0.3640831930096027,
+      "grad_norm": 1.9923436641693115,
+      "learning_rate": 9.090578060992263e-05,
+      "loss": 0.4346,
       "step": 2000
     },
     {
+      "epoch": 0.40959359213580304,
+      "grad_norm": 2.253584623336792,
+      "learning_rate": 8.976786527082386e-05,
+      "loss": 0.4286,
+      "step": 2250
+    },
+    {
+      "epoch": 0.45510399126200335,
+      "grad_norm": 2.1932711601257324,
+      "learning_rate": 8.862994993172509e-05,
+      "loss": 0.4201,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5006143903882037,
+      "grad_norm": 2.224640130996704,
+      "learning_rate": 8.749203459262631e-05,
+      "loss": 0.4089,
+      "step": 2750
+    },
+    {
+      "epoch": 0.546124789514404,
+      "grad_norm": 2.7368321418762207,
+      "learning_rate": 8.635411925352754e-05,
+      "loss": 0.3989,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5916351886406044,
+      "grad_norm": 2.6221635341644287,
+      "learning_rate": 8.521620391442878e-05,
+      "loss": 0.3884,
+      "step": 3250
+    },
+    {
+      "epoch": 0.6371455877668047,
+      "grad_norm": 2.952885866165161,
+      "learning_rate": 8.407828857533e-05,
+      "loss": 0.3769,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6826559868930051,
+      "grad_norm": 3.0590875148773193,
+      "learning_rate": 8.294037323623123e-05,
+      "loss": 0.3713,
+      "step": 3750
+    },
+    {
+      "epoch": 0.7281663860192054,
+      "grad_norm": 2.773761034011841,
+      "learning_rate": 8.180245789713246e-05,
+      "loss": 0.3589,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7736767851454057,
+      "grad_norm": 3.0175020694732666,
+      "learning_rate": 8.066454255803368e-05,
+      "loss": 0.3518,
+      "step": 4250
+    },
+    {
+      "epoch": 0.8191871842716061,
+      "grad_norm": 2.867734670639038,
+      "learning_rate": 7.952662721893491e-05,
+      "loss": 0.3433,
+      "step": 4500
+    },
+    {
+      "epoch": 0.8646975833978064,
+      "grad_norm": 2.8594553470611572,
+      "learning_rate": 7.838871187983614e-05,
+      "loss": 0.3317,
+      "step": 4750
+    },
+    {
+      "epoch": 0.9102079825240067,
+      "grad_norm": 2.9394314289093018,
+      "learning_rate": 7.725079654073737e-05,
+      "loss": 0.3259,
+      "step": 5000
+    },
+    {
+      "epoch": 0.9557183816502071,
+      "grad_norm": 2.9321157932281494,
+      "learning_rate": 7.61128812016386e-05,
+      "loss": 0.3189,
+      "step": 5250
+    },
+    {
+      "epoch": 1.0012287807764073,
+      "grad_norm": 3.3222484588623047,
+      "learning_rate": 7.497496586253983e-05,
+      "loss": 0.3149,
+      "step": 5500
+    },
+    {
+      "epoch": 1.0467391799026078,
+      "grad_norm": 2.9881997108459473,
+      "learning_rate": 7.383705052344107e-05,
+      "loss": 0.3039,
+      "step": 5750
+    },
+    {
+      "epoch": 1.092249579028808,
+      "grad_norm": 3.13385272026062,
+      "learning_rate": 7.26991351843423e-05,
+      "loss": 0.2992,
+      "step": 6000
+    },
+    {
+      "epoch": 1.1377599781550085,
+      "grad_norm": 3.6479313373565674,
+      "learning_rate": 7.156121984524352e-05,
+      "loss": 0.2918,
+      "step": 6250
+    },
+    {
+      "epoch": 1.1832703772812088,
+      "grad_norm": 3.1848132610321045,
+      "learning_rate": 7.042330450614475e-05,
+      "loss": 0.2867,
+      "step": 6500
+    },
+    {
+      "epoch": 1.228780776407409,
+      "grad_norm": 3.0174155235290527,
+      "learning_rate": 6.928538916704597e-05,
+      "loss": 0.2841,
+      "step": 6750
+    },
+    {
+      "epoch": 1.2742911755336095,
+      "grad_norm": 3.2451624870300293,
+      "learning_rate": 6.814747382794721e-05,
+      "loss": 0.2795,
+      "step": 7000
+    },
+    {
+      "epoch": 1.3198015746598097,
+      "grad_norm": 3.4281792640686035,
+      "learning_rate": 6.700955848884844e-05,
+      "loss": 0.2762,
+      "step": 7250
+    },
+    {
+      "epoch": 1.3653119737860102,
+      "grad_norm": 3.198448657989502,
+      "learning_rate": 6.587164314974966e-05,
+      "loss": 0.2713,
+      "step": 7500
+    },
+    {
+      "epoch": 1.4108223729122105,
+      "grad_norm": 3.210265636444092,
+      "learning_rate": 6.473372781065089e-05,
+      "loss": 0.2695,
+      "step": 7750
+    },
+    {
+      "epoch": 1.4563327720384107,
+      "grad_norm": 2.9175002574920654,
+      "learning_rate": 6.359581247155212e-05,
+      "loss": 0.2645,
+      "step": 8000
+    },
+    {
+      "epoch": 1.501843171164611,
+      "grad_norm": 2.8182640075683594,
+      "learning_rate": 6.245789713245334e-05,
+      "loss": 0.2646,
+      "step": 8250
+    },
+    {
+      "epoch": 1.5473535702908114,
+      "grad_norm": 2.8764665126800537,
+      "learning_rate": 6.131998179335457e-05,
+      "loss": 0.2608,
+      "step": 8500
+    },
+    {
+      "epoch": 1.592863969417012,
+      "grad_norm": 3.4266469478607178,
+      "learning_rate": 6.01820664542558e-05,
+      "loss": 0.2566,
+      "step": 8750
+    },
+    {
+      "epoch": 1.6383743685432122,
+      "grad_norm": 3.518105983734131,
+      "learning_rate": 5.904415111515703e-05,
+      "loss": 0.2526,
+      "step": 9000
+    },
+    {
+      "epoch": 1.6838847676694124,
+      "grad_norm": 2.892153739929199,
+      "learning_rate": 5.790623577605826e-05,
+      "loss": 0.2495,
+      "step": 9250
+    },
+    {
+      "epoch": 1.7293951667956127,
+      "grad_norm": 2.9470598697662354,
+      "learning_rate": 5.676832043695949e-05,
+      "loss": 0.2478,
+      "step": 9500
+    },
+    {
+      "epoch": 1.7749055659218131,
+      "grad_norm": 2.7485554218292236,
+      "learning_rate": 5.563040509786073e-05,
+      "loss": 0.2475,
+      "step": 9750
+    },
+    {
+      "epoch": 1.8204159650480136,
+      "grad_norm": 2.946807622909546,
+      "learning_rate": 5.4492489758761954e-05,
+      "loss": 0.2434,
+      "step": 10000
+    },
+    {
+      "epoch": 1.8659263641742139,
+      "grad_norm": 3.0199010372161865,
+      "learning_rate": 5.3354574419663187e-05,
+      "loss": 0.2411,
+      "step": 10250
+    },
+    {
+      "epoch": 1.9114367633004141,
+      "grad_norm": 2.9265036582946777,
+      "learning_rate": 5.221665908056441e-05,
+      "loss": 0.2388,
+      "step": 10500
+    },
+    {
+      "epoch": 1.9569471624266144,
+      "grad_norm": 3.0778729915618896,
+      "learning_rate": 5.107874374146564e-05,
+      "loss": 0.2384,
+      "step": 10750
+    },
+    {
+      "epoch": 2.0024575615528146,
+      "grad_norm": 2.8588147163391113,
+      "learning_rate": 4.9940828402366865e-05,
+      "loss": 0.2346,
+      "step": 11000
+    },
+    {
+      "epoch": 2.0479679606790153,
+      "grad_norm": 2.6789040565490723,
+      "learning_rate": 4.88029130632681e-05,
+      "loss": 0.2281,
+      "step": 11250
+    },
+    {
+      "epoch": 2.0934783598052156,
+      "grad_norm": 2.937530279159546,
+      "learning_rate": 4.7664997724169324e-05,
+      "loss": 0.2275,
+      "step": 11500
+    },
+    {
+      "epoch": 2.138988758931416,
+      "grad_norm": 3.119990587234497,
+      "learning_rate": 4.652708238507055e-05,
+      "loss": 0.2232,
+      "step": 11750
+    },
+    {
+      "epoch": 2.184499158057616,
+      "grad_norm": 3.223001480102539,
+      "learning_rate": 4.5389167045971784e-05,
+      "loss": 0.2258,
+      "step": 12000
+    },
+    {
+      "epoch": 2.2300095571838163,
+      "grad_norm": 3.1424496173858643,
+      "learning_rate": 4.425125170687301e-05,
+      "loss": 0.2208,
+      "step": 12250
+    },
+    {
+      "epoch": 2.275519956310017,
+      "grad_norm": 3.063213586807251,
+      "learning_rate": 4.3113336367774236e-05,
+      "loss": 0.2228,
+      "step": 12500
+    },
+    {
+      "epoch": 2.3210303554362173,
+      "grad_norm": 2.8787999153137207,
+      "learning_rate": 4.197542102867547e-05,
+      "loss": 0.2214,
+      "step": 12750
+    },
+    {
+      "epoch": 2.3665407545624175,
+      "grad_norm": 2.8783469200134277,
+      "learning_rate": 4.08375056895767e-05,
+      "loss": 0.22,
+      "step": 13000
+    },
+    {
+      "epoch": 2.4120511536886178,
+      "grad_norm": 3.0098447799682617,
+      "learning_rate": 3.969959035047793e-05,
+      "loss": 0.2165,
+      "step": 13250
+    },
+    {
+      "epoch": 2.457561552814818,
+      "grad_norm": 3.0888121128082275,
+      "learning_rate": 3.8561675011379155e-05,
+      "loss": 0.2169,
+      "step": 13500
+    },
+    {
+      "epoch": 2.5030719519410187,
+      "grad_norm": 2.7234771251678467,
+      "learning_rate": 3.742375967228038e-05,
+      "loss": 0.2144,
+      "step": 13750
+    },
+    {
+      "epoch": 2.548582351067219,
+      "grad_norm": 2.6636533737182617,
+      "learning_rate": 3.6285844333181614e-05,
+      "loss": 0.2116,
+      "step": 14000
+    },
+    {
+      "epoch": 2.5940927501934192,
+      "grad_norm": 2.8893346786499023,
+      "learning_rate": 3.514792899408284e-05,
+      "loss": 0.2134,
+      "step": 14250
+    },
+    {
+      "epoch": 2.6396031493196195,
+      "grad_norm": 3.0065407752990723,
+      "learning_rate": 3.4010013654984066e-05,
+      "loss": 0.2125,
+      "step": 14500
+    },
+    {
+      "epoch": 2.6851135484458197,
+      "grad_norm": 3.033083200454712,
+      "learning_rate": 3.28720983158853e-05,
+      "loss": 0.2105,
+      "step": 14750
+    },
+    {
+      "epoch": 2.7306239475720204,
+      "grad_norm": 3.2030928134918213,
+      "learning_rate": 3.173418297678653e-05,
+      "loss": 0.2101,
+      "step": 15000
+    },
+    {
+      "epoch": 2.7761343466982207,
+      "grad_norm": 2.713702917098999,
+      "learning_rate": 3.059626763768776e-05,
+      "loss": 0.2108,
+      "step": 15250
+    },
+    {
+      "epoch": 2.821644745824421,
+      "grad_norm": 3.2595670223236084,
+      "learning_rate": 2.9458352298588988e-05,
+      "loss": 0.2068,
+      "step": 15500
+    },
+    {
+      "epoch": 2.867155144950621,
+      "grad_norm": 2.702853202819824,
+      "learning_rate": 2.8320436959490214e-05,
+      "loss": 0.2074,
+      "step": 15750
+    },
+    {
+      "epoch": 2.9126655440768214,
+      "grad_norm": 2.6552271842956543,
+      "learning_rate": 2.7182521620391444e-05,
+      "loss": 0.2057,
+      "step": 16000
+    },
+    {
+      "epoch": 2.958175943203022,
+      "grad_norm": 2.71763014793396,
+      "learning_rate": 2.604460628129267e-05,
+      "loss": 0.2042,
+      "step": 16250
+    },
+    {
+      "epoch": 3.0036863423292224,
+      "grad_norm": 2.951164960861206,
+      "learning_rate": 2.4906690942193903e-05,
+      "loss": 0.2038,
+      "step": 16500
+    },
+    {
+      "epoch": 3.0491967414554226,
+      "grad_norm": 2.5258820056915283,
+      "learning_rate": 2.376877560309513e-05,
+      "loss": 0.1984,
+      "step": 16750
+    },
+    {
+      "epoch": 3.094707140581623,
+      "grad_norm": 2.263176679611206,
+      "learning_rate": 2.263086026399636e-05,
+      "loss": 0.1978,
+      "step": 17000
+    },
+    {
+      "epoch": 3.140217539707823,
+      "grad_norm": 2.466646194458008,
+      "learning_rate": 2.149294492489759e-05,
+      "loss": 0.1978,
+      "step": 17250
+    },
+    {
+      "epoch": 3.1857279388340234,
+      "grad_norm": 2.5945017337799072,
+      "learning_rate": 2.0355029585798818e-05,
+      "loss": 0.1979,
+      "step": 17500
+    },
+    {
+      "epoch": 3.231238337960224,
+      "grad_norm": 2.984452247619629,
+      "learning_rate": 1.9217114246700048e-05,
+      "loss": 0.1976,
+      "step": 17750
+    },
+    {
+      "epoch": 3.2767487370864243,
+      "grad_norm": 2.494358777999878,
+      "learning_rate": 1.8079198907601274e-05,
+      "loss": 0.1942,
+      "step": 18000
+    },
+    {
+      "epoch": 3.3222591362126246,
+      "grad_norm": 2.498244047164917,
+      "learning_rate": 1.6941283568502504e-05,
+      "loss": 0.1954,
+      "step": 18250
+    },
+    {
+      "epoch": 3.367769535338825,
+      "grad_norm": 2.7161076068878174,
+      "learning_rate": 1.5803368229403733e-05,
+      "loss": 0.1945,
+      "step": 18500
+    },
+    {
+      "epoch": 3.413279934465025,
+      "grad_norm": 2.229287624359131,
+      "learning_rate": 1.4665452890304963e-05,
+      "loss": 0.194,
+      "step": 18750
+    },
+    {
+      "epoch": 3.458790333591226,
+      "grad_norm": 2.9624757766723633,
+      "learning_rate": 1.352753755120619e-05,
+      "loss": 0.1921,
+      "step": 19000
+    },
+    {
+      "epoch": 3.504300732717426,
+      "grad_norm": 2.2083513736724854,
+      "learning_rate": 1.238962221210742e-05,
+      "loss": 0.1924,
+      "step": 19250
+    },
+    {
+      "epoch": 3.5498111318436263,
+      "grad_norm": 3.0686652660369873,
+      "learning_rate": 1.1251706873008648e-05,
+      "loss": 0.1907,
+      "step": 19500
+    },
+    {
+      "epoch": 3.5953215309698265,
+      "grad_norm": 2.542316198348999,
+      "learning_rate": 1.0113791533909878e-05,
+      "loss": 0.1918,
+      "step": 19750
+    },
+    {
+      "epoch": 3.640831930096027,
+      "grad_norm": 2.798839569091797,
+      "learning_rate": 8.975876194811106e-06,
+      "loss": 0.1903,
+      "step": 20000
+    },
+    {
+      "epoch": 3.686342329222227,
+      "grad_norm": 2.9675209522247314,
+      "learning_rate": 7.837960855712335e-06,
+      "loss": 0.1898,
+      "step": 20250
+    },
+    {
+      "epoch": 3.7318527283484277,
+      "grad_norm": 3.0449047088623047,
+      "learning_rate": 6.700045516613563e-06,
+      "loss": 0.1899,
+      "step": 20500
+    },
+    {
+      "epoch": 3.777363127474628,
+      "grad_norm": 2.863095998764038,
+      "learning_rate": 5.562130177514793e-06,
+      "loss": 0.1887,
+      "step": 20750
+    },
+    {
+      "epoch": 3.8228735266008282,
+      "grad_norm": 2.4912309646606445,
+      "learning_rate": 4.4242148384160225e-06,
+      "loss": 0.1878,
+      "step": 21000
+    },
+    {
+      "epoch": 3.8683839257270285,
+      "grad_norm": 2.7340869903564453,
+      "learning_rate": 3.286299499317251e-06,
+      "loss": 0.1889,
+      "step": 21250
+    },
+    {
+      "epoch": 3.9138943248532287,
+      "grad_norm": 2.823261022567749,
+      "learning_rate": 2.14838416021848e-06,
+      "loss": 0.1894,
+      "step": 21500
+    },
+    {
+      "epoch": 3.9594047239794294,
+      "grad_norm": 2.7040815353393555,
+      "learning_rate": 1.0104688211197086e-06,
+      "loss": 0.1874,
+      "step": 21750
+    },
+    {
+      "epoch": 3.9998179584034954,
+      "step": 21972,
+      "total_flos": 1.7842732433211018e+18,
+      "train_loss": 0.27372986671813043,
+      "train_runtime": 41504.9243,
+      "train_samples_per_second": 50.823,
+      "train_steps_per_second": 0.529
     }
   ],
   "logging_steps": 250,
+  "max_steps": 21972,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.7842732433211018e+18,
+  "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a167e16fc5a61712026f164c7264224fa9b0100012d73f4003cfc62ce701f790
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d70ad05f2dd7bb2424b5a331d1ad81a4681bc57bd29bb412e98f4fbbc6036e3
 size 5240