fine-tuned for 1 epoch

Browse files

Files changed (4) hide show

README.md +245 -0
adapter_config.json +41 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,245 @@

+---
+library_name: peft
+license: apache-2.0
+base_model: HuggingFaceTB/SmolVLM-Instruct
+tags:
+- generated_from_trainer
+model-index:
+- name: smolvlm-mmocr-sft-round-2
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# smolvlm-mmocr-sft-round-2
+This model is a fine-tuned version of [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0196
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 96
+- eval_batch_size: 96
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 768
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.5099        | 0.0054 | 20   | 0.4944          |
+| 0.4426        | 0.0109 | 40   | 0.4381          |
+| 0.4117        | 0.0163 | 60   | 0.3822          |
+| 0.3378        | 0.0217 | 80   | 0.3352          |
+| 0.3107        | 0.0271 | 100  | 0.2984          |
+| 0.2839        | 0.0326 | 120  | 0.2696          |
+| 0.2629        | 0.0380 | 140  | 0.2478          |
+| 0.2467        | 0.0434 | 160  | 0.2305          |
+| 0.2306        | 0.0489 | 180  | 0.2160          |
+| 0.2283        | 0.0543 | 200  | 0.2037          |
+| 0.1958        | 0.0597 | 220  | 0.1927          |
+| 0.1887        | 0.0651 | 240  | 0.1830          |
+| 0.1699        | 0.0706 | 260  | 0.1738          |
+| 0.155         | 0.0760 | 280  | 0.1655          |
+| 0.1579        | 0.0814 | 300  | 0.1575          |
+| 0.1437        | 0.0869 | 320  | 0.1506          |
+| 0.1549        | 0.0923 | 340  | 0.1437          |
+| 0.1336        | 0.0977 | 360  | 0.1371          |
+| 0.1268        | 0.1032 | 380  | 0.1314          |
+| 0.139         | 0.1086 | 400  | 0.1258          |
+| 0.1215        | 0.1140 | 420  | 0.1205          |
+| 0.1103        | 0.1194 | 440  | 0.1156          |
+| 0.1141        | 0.1249 | 460  | 0.1111          |
+| 0.1175        | 0.1303 | 480  | 0.1066          |
+| 0.0978        | 0.1357 | 500  | 0.1023          |
+| 0.1016        | 0.1412 | 520  | 0.0986          |
+| 0.0899        | 0.1466 | 540  | 0.0947          |
+| 0.0952        | 0.1520 | 560  | 0.0909          |
+| 0.0775        | 0.1574 | 580  | 0.0876          |
+| 0.0862        | 0.1629 | 600  | 0.0845          |
+| 0.0777        | 0.1683 | 620  | 0.0809          |
+| 0.077         | 0.1737 | 640  | 0.0779          |
+| 0.0757        | 0.1792 | 660  | 0.0752          |
+| 0.0627        | 0.1846 | 680  | 0.0724          |
+| 0.0635        | 0.1900 | 700  | 0.0701          |
+| 0.0619        | 0.1954 | 720  | 0.0674          |
+| 0.0593        | 0.2009 | 740  | 0.0651          |
+| 0.0686        | 0.2063 | 760  | 0.0633          |
+| 0.0626        | 0.2117 | 780  | 0.0613          |
+| 0.0596        | 0.2172 | 800  | 0.0591          |
+| 0.0447        | 0.2226 | 820  | 0.0575          |
+| 0.0548        | 0.2280 | 840  | 0.0556          |
+| 0.0555        | 0.2334 | 860  | 0.0546          |
+| 0.0547        | 0.2389 | 880  | 0.0536          |
+| 0.0461        | 0.2443 | 900  | 0.0512          |
+| 0.0445        | 0.2497 | 920  | 0.0501          |
+| 0.0386        | 0.2552 | 940  | 0.0491          |
+| 0.0334        | 0.2606 | 960  | 0.0475          |
+| 0.0498        | 0.2660 | 980  | 0.0458          |
+| 0.0365        | 0.2715 | 1000 | 0.0452          |
+| 0.0364        | 0.2769 | 1020 | 0.0442          |
+| 0.0351        | 0.2823 | 1040 | 0.0433          |
+| 0.0393        | 0.2878 | 1060 | 0.0418          |
+| 0.0359        | 0.2932 | 1080 | 0.0413          |
+| 0.0347        | 0.2986 | 1100 | 0.0405          |
+| 0.0351        | 0.3040 | 1120 | 0.0397          |
+| 0.0386        | 0.3095 | 1140 | 0.0385          |
+| 0.04          | 0.3149 | 1160 | 0.0380          |
+| 0.0288        | 0.3203 | 1180 | 0.0375          |
+| 0.0279        | 0.3258 | 1200 | 0.0370          |
+| 0.0285        | 0.3312 | 1220 | 0.0362          |
+| 0.0308        | 0.3366 | 1240 | 0.0357          |
+| 0.0302        | 0.3420 | 1260 | 0.0351          |
+| 0.026         | 0.3475 | 1280 | 0.0349          |
+| 0.0262        | 0.3529 | 1300 | 0.0345          |
+| 0.0295        | 0.3583 | 1320 | 0.0341          |
+| 0.0315        | 0.3638 | 1340 | 0.0335          |
+| 0.0325        | 0.3692 | 1360 | 0.0327          |
+| 0.0264        | 0.3746 | 1380 | 0.0324          |
+| 0.0321        | 0.3800 | 1400 | 0.0319          |
+| 0.0268        | 0.3855 | 1420 | 0.0312          |
+| 0.0248        | 0.3909 | 1440 | 0.0313          |
+| 0.0361        | 0.3963 | 1460 | 0.0312          |
+| 0.0277        | 0.4018 | 1480 | 0.0303          |
+| 0.0274        | 0.4072 | 1500 | 0.0300          |
+| 0.028         | 0.4126 | 1520 | 0.0294          |
+| 0.0223        | 0.4181 | 1540 | 0.0290          |
+| 0.0247        | 0.4235 | 1560 | 0.0289          |
+| 0.0265        | 0.4289 | 1580 | 0.0289          |
+| 0.0285        | 0.4343 | 1600 | 0.0285          |
+| 0.0284        | 0.4398 | 1620 | 0.0284          |
+| 0.0255        | 0.4452 | 1640 | 0.0279          |
+| 0.0246        | 0.4506 | 1660 | 0.0273          |
+| 0.0196        | 0.4561 | 1680 | 0.0274          |
+| 0.0272        | 0.4615 | 1700 | 0.0270          |
+| 0.0206        | 0.4669 | 1720 | 0.0268          |
+| 0.02          | 0.4723 | 1740 | 0.0268          |
+| 0.0229        | 0.4778 | 1760 | 0.0268          |
+| 0.0214        | 0.4832 | 1780 | 0.0260          |
+| 0.0246        | 0.4886 | 1800 | 0.0261          |
+| 0.0189        | 0.4941 | 1820 | 0.0254          |
+| 0.0199        | 0.4995 | 1840 | 0.0253          |
+| 0.0168        | 0.5049 | 1860 | 0.0251          |
+| 0.0297        | 0.5103 | 1880 | 0.0250          |
+| 0.0237        | 0.5158 | 1900 | 0.0245          |
+| 0.0189        | 0.5212 | 1920 | 0.0250          |
+| 0.0274        | 0.5266 | 1940 | 0.0243          |
+| 0.0143        | 0.5321 | 1960 | 0.0243          |
+| 0.0201        | 0.5375 | 1980 | 0.0241          |
+| 0.0258        | 0.5429 | 2000 | 0.0241          |
+| 0.0256        | 0.5484 | 2020 | 0.0239          |
+| 0.018         | 0.5538 | 2040 | 0.0235          |
+| 0.019         | 0.5592 | 2060 | 0.0236          |
+| 0.0252        | 0.5646 | 2080 | 0.0233          |
+| 0.0215        | 0.5701 | 2100 | 0.0234          |
+| 0.0204        | 0.5755 | 2120 | 0.0235          |
+| 0.0165        | 0.5809 | 2140 | 0.0230          |
+| 0.018         | 0.5864 | 2160 | 0.0230          |
+| 0.0236        | 0.5918 | 2180 | 0.0228          |
+| 0.0199        | 0.5972 | 2200 | 0.0228          |
+| 0.0153        | 0.6026 | 2220 | 0.0226          |
+| 0.0166        | 0.6081 | 2240 | 0.0227          |
+| 0.0212        | 0.6135 | 2260 | 0.0225          |
+| 0.0213        | 0.6189 | 2280 | 0.0224          |
+| 0.0165        | 0.6244 | 2300 | 0.0223          |
+| 0.0197        | 0.6298 | 2320 | 0.0224          |
+| 0.0219        | 0.6352 | 2340 | 0.0223          |
+| 0.0277        | 0.6407 | 2360 | 0.0222          |
+| 0.015         | 0.6461 | 2380 | 0.0220          |
+| 0.0191        | 0.6515 | 2400 | 0.0218          |
+| 0.0204        | 0.6569 | 2420 | 0.0220          |
+| 0.0162        | 0.6624 | 2440 | 0.0219          |
+| 0.0174        | 0.6678 | 2460 | 0.0219          |
+| 0.0144        | 0.6732 | 2480 | 0.0217          |
+| 0.0219        | 0.6787 | 2500 | 0.0215          |
+| 0.0152        | 0.6841 | 2520 | 0.0217          |
+| 0.0107        | 0.6895 | 2540 | 0.0215          |
+| 0.0173        | 0.6949 | 2560 | 0.0213          |
+| 0.0173        | 0.7004 | 2580 | 0.0214          |
+| 0.0147        | 0.7058 | 2600 | 0.0214          |
+| 0.015         | 0.7112 | 2620 | 0.0211          |
+| 0.0158        | 0.7167 | 2640 | 0.0211          |
+| 0.0128        | 0.7221 | 2660 | 0.0211          |
+| 0.0185        | 0.7275 | 2680 | 0.0210          |
+| 0.0214        | 0.7329 | 2700 | 0.0210          |
+| 0.0162        | 0.7384 | 2720 | 0.0209          |
+| 0.017         | 0.7438 | 2740 | 0.0210          |
+| 0.0124        | 0.7492 | 2760 | 0.0211          |
+| 0.0187        | 0.7547 | 2780 | 0.0207          |
+| 0.0166        | 0.7601 | 2800 | 0.0209          |
+| 0.0204        | 0.7655 | 2820 | 0.0207          |
+| 0.0168        | 0.7710 | 2840 | 0.0207          |
+| 0.0209        | 0.7764 | 2860 | 0.0206          |
+| 0.0166        | 0.7818 | 2880 | 0.0206          |
+| 0.0174        | 0.7872 | 2900 | 0.0207          |
+| 0.0208        | 0.7927 | 2920 | 0.0205          |
+| 0.013         | 0.7981 | 2940 | 0.0204          |
+| 0.0112        | 0.8035 | 2960 | 0.0205          |
+| 0.0174        | 0.8090 | 2980 | 0.0203          |
+| 0.0169        | 0.8144 | 3000 | 0.0204          |
+| 0.0129        | 0.8198 | 3020 | 0.0202          |
+| 0.0191        | 0.8252 | 3040 | 0.0201          |
+| 0.0166        | 0.8307 | 3060 | 0.0202          |
+| 0.0182        | 0.8361 | 3080 | 0.0201          |
+| 0.0208        | 0.8415 | 3100 | 0.0201          |
+| 0.015         | 0.8470 | 3120 | 0.0200          |
+| 0.0246        | 0.8524 | 3140 | 0.0200          |
+| 0.0158        | 0.8578 | 3160 | 0.0201          |
+| 0.0183        | 0.8633 | 3180 | 0.0201          |
+| 0.0171        | 0.8687 | 3200 | 0.0201          |
+| 0.0173        | 0.8741 | 3220 | 0.0199          |
+| 0.017         | 0.8795 | 3240 | 0.0199          |
+| 0.0123        | 0.8850 | 3260 | 0.0199          |
+| 0.0144        | 0.8904 | 3280 | 0.0200          |
+| 0.0149        | 0.8958 | 3300 | 0.0199          |
+| 0.0139        | 0.9013 | 3320 | 0.0199          |
+| 0.0199        | 0.9067 | 3340 | 0.0198          |
+| 0.0164        | 0.9121 | 3360 | 0.0197          |
+| 0.0154        | 0.9175 | 3380 | 0.0198          |
+| 0.0225        | 0.9230 | 3400 | 0.0198          |
+| 0.0232        | 0.9284 | 3420 | 0.0198          |
+| 0.0135        | 0.9338 | 3440 | 0.0197          |
+| 0.016         | 0.9393 | 3460 | 0.0197          |
+| 0.0164        | 0.9447 | 3480 | 0.0197          |
+| 0.0147        | 0.9501 | 3500 | 0.0197          |
+| 0.015         | 0.9555 | 3520 | 0.0197          |
+| 0.0215        | 0.9610 | 3540 | 0.0196          |
+| 0.0174        | 0.9664 | 3560 | 0.0197          |
+| 0.0203        | 0.9718 | 3580 | 0.0197          |
+| 0.0191        | 0.9773 | 3600 | 0.0197          |
+| 0.02          | 0.9827 | 3620 | 0.0196          |
+| 0.0128        | 0.9881 | 3640 | 0.0196          |
+| 0.0145        | 0.9936 | 3660 | 0.0197          |
+| 0.0179        | 0.9990 | 3680 | 0.0196          |
+### Framework versions
+- PEFT 0.14.0
+- Transformers 4.49.0
+- Pytorch 2.5.1+cu124
+- Datasets 3.3.2
+- Tokenizers 0.21.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "HuggingFaceTB/SmolVLM-Instruct",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "fc1",
+    "gate_proj",
+    "down_proj",
+    "out_proj",
+    "q_proj",
+    "proj",
+    "fc2",
+    "v_proj",
+    "k_proj",
+    "up_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5faac249625039309923e3d11048422314cd482bd75a9cc74ef9cff9112eb03
+size 108025880

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af56170fc1f10893c7099a94453dcf57a287298df33b0a6ae80a3986e471b3f9
+size 5304