CortexPE commited on Feb 27

Commit

cd6a6e6

verified ·

1 Parent(s): 812f3cc

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

checkpoint-1648/config.json +44 -0
checkpoint-1648/model.safetensors +3 -0
checkpoint-1648/optimizer.pt +3 -0
checkpoint-1648/rng_state.pth +3 -0
checkpoint-1648/scheduler.pt +3 -0
checkpoint-1648/trainer_state.json +166 -0
checkpoint-1648/training_args.bin +3 -0
checkpoint-3296/config.json +44 -0
checkpoint-3296/model.safetensors +3 -0
checkpoint-3296/optimizer.pt +3 -0
checkpoint-3296/rng_state.pth +3 -0
checkpoint-3296/scheduler.pt +3 -0
checkpoint-3296/trainer_state.json +290 -0
checkpoint-3296/training_args.bin +3 -0
checkpoint-4944/config.json +44 -0
checkpoint-4944/model.safetensors +3 -0
checkpoint-4944/optimizer.pt +3 -0
checkpoint-4944/rng_state.pth +3 -0
checkpoint-4944/scheduler.pt +3 -0
checkpoint-4944/trainer_state.json +421 -0
checkpoint-4944/training_args.bin +3 -0
config.json +44 -0
confusion_matrix.png +0 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
training_args.bin +3 -0
type_mapping.json +1 -0
vocab.txt +0 -0

checkpoint-1648/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "distilbert-base-multilingual-cased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 119547
+}

checkpoint-1648/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b127042ca9191b532a59773b581d6bd536541118bc87bd2d236e7180ef11bf42
+size 541332756

checkpoint-1648/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb0d2b238e0e306d6873e263a46e497e316883004aa945dbd0dbe5b3cf44d870
+size 1082727610

checkpoint-1648/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ba3b48e31073f7530281564c9ca8cea953948e8e36bf40e284519532add5360
+size 14244

checkpoint-1648/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21d4589dc98af400cfd4548dd7f95e09263e025d27f9d116be6ea3a835592735
+size 1064

checkpoint-1648/trainer_state.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "best_metric": 0.87963672068735,
+  "best_model_checkpoint": "distilbert-type-classifier/checkpoint-1648",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1648,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06067961165048544,
+      "grad_norm": 7.083491325378418,
+      "learning_rate": 1.959546925566343e-05,
+      "loss": 1.3291,
+      "step": 100
+    },
+    {
+      "epoch": 0.12135922330097088,
+      "grad_norm": 5.435772895812988,
+      "learning_rate": 1.919093851132686e-05,
+      "loss": 0.7948,
+      "step": 200
+    },
+    {
+      "epoch": 0.1820388349514563,
+      "grad_norm": 5.531534671783447,
+      "learning_rate": 1.8786407766990295e-05,
+      "loss": 0.59,
+      "step": 300
+    },
+    {
+      "epoch": 0.24271844660194175,
+      "grad_norm": 5.0550031661987305,
+      "learning_rate": 1.838187702265372e-05,
+      "loss": 0.5483,
+      "step": 400
+    },
+    {
+      "epoch": 0.30339805825242716,
+      "grad_norm": 9.326889038085938,
+      "learning_rate": 1.7977346278317155e-05,
+      "loss": 0.472,
+      "step": 500
+    },
+    {
+      "epoch": 0.3640776699029126,
+      "grad_norm": 11.223947525024414,
+      "learning_rate": 1.7572815533980585e-05,
+      "loss": 0.4334,
+      "step": 600
+    },
+    {
+      "epoch": 0.42475728155339804,
+      "grad_norm": 6.310647487640381,
+      "learning_rate": 1.7168284789644015e-05,
+      "loss": 0.3993,
+      "step": 700
+    },
+    {
+      "epoch": 0.4854368932038835,
+      "grad_norm": 13.495203018188477,
+      "learning_rate": 1.6763754045307445e-05,
+      "loss": 0.3976,
+      "step": 800
+    },
+    {
+      "epoch": 0.5461165048543689,
+      "grad_norm": 9.985793113708496,
+      "learning_rate": 1.6359223300970874e-05,
+      "loss": 0.4045,
+      "step": 900
+    },
+    {
+      "epoch": 0.6067961165048543,
+      "grad_norm": 5.920831203460693,
+      "learning_rate": 1.5954692556634304e-05,
+      "loss": 0.3661,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6674757281553398,
+      "grad_norm": 7.346311092376709,
+      "learning_rate": 1.5550161812297734e-05,
+      "loss": 0.3527,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7281553398058253,
+      "grad_norm": 7.223079681396484,
+      "learning_rate": 1.5145631067961166e-05,
+      "loss": 0.361,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7888349514563107,
+      "grad_norm": 5.1326799392700195,
+      "learning_rate": 1.4741100323624598e-05,
+      "loss": 0.3892,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8495145631067961,
+      "grad_norm": 7.782454967498779,
+      "learning_rate": 1.4336569579288026e-05,
+      "loss": 0.3502,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9101941747572816,
+      "grad_norm": 8.951793670654297,
+      "learning_rate": 1.3932038834951458e-05,
+      "loss": 0.2934,
+      "step": 1500
+    },
+    {
+      "epoch": 0.970873786407767,
+      "grad_norm": 9.385202407836914,
+      "learning_rate": 1.3527508090614887e-05,
+      "loss": 0.3436,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8803116147308782,
+      "eval_f1": 0.87963672068735,
+      "eval_loss": 0.30396342277526855,
+      "eval_precision": 0.8827457335389428,
+      "eval_recall": 0.8803116147308782,
+      "eval_runtime": 21.997,
+      "eval_samples_per_second": 256.762,
+      "eval_steps_per_second": 16.048,
+      "step": 1648
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4944,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 872905517958144.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1648/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
+size 5304

checkpoint-3296/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "distilbert-base-multilingual-cased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 119547
+}

checkpoint-3296/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5bfcf35f0a247d68dd865628094b6058cc18b89924f880ab9417eb04731ce5
+size 541332756

checkpoint-3296/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8182b3aea9a6c91bd337d14cfa4a502f5e40c5aca604a463ff7331f80862faf1
+size 1082727610

checkpoint-3296/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec96d0b8b5f52722b8b82f029bdb73b1ee05b7d00a36093eeedec1c70c5621f2
+size 14244

checkpoint-3296/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9576b6ded02fee58504ff02bb633c9ed789e6a4b3782812ebca632813095fe
+size 1064

checkpoint-3296/trainer_state.json ADDED Viewed

	@@ -0,0 +1,290 @@

+{
+  "best_metric": 0.8996119975479232,
+  "best_model_checkpoint": "distilbert-type-classifier/checkpoint-3296",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 3296,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06067961165048544,
+      "grad_norm": 7.083491325378418,
+      "learning_rate": 1.959546925566343e-05,
+      "loss": 1.3291,
+      "step": 100
+    },
+    {
+      "epoch": 0.12135922330097088,
+      "grad_norm": 5.435772895812988,
+      "learning_rate": 1.919093851132686e-05,
+      "loss": 0.7948,
+      "step": 200
+    },
+    {
+      "epoch": 0.1820388349514563,
+      "grad_norm": 5.531534671783447,
+      "learning_rate": 1.8786407766990295e-05,
+      "loss": 0.59,
+      "step": 300
+    },
+    {
+      "epoch": 0.24271844660194175,
+      "grad_norm": 5.0550031661987305,
+      "learning_rate": 1.838187702265372e-05,
+      "loss": 0.5483,
+      "step": 400
+    },
+    {
+      "epoch": 0.30339805825242716,
+      "grad_norm": 9.326889038085938,
+      "learning_rate": 1.7977346278317155e-05,
+      "loss": 0.472,
+      "step": 500
+    },
+    {
+      "epoch": 0.3640776699029126,
+      "grad_norm": 11.223947525024414,
+      "learning_rate": 1.7572815533980585e-05,
+      "loss": 0.4334,
+      "step": 600
+    },
+    {
+      "epoch": 0.42475728155339804,
+      "grad_norm": 6.310647487640381,
+      "learning_rate": 1.7168284789644015e-05,
+      "loss": 0.3993,
+      "step": 700
+    },
+    {
+      "epoch": 0.4854368932038835,
+      "grad_norm": 13.495203018188477,
+      "learning_rate": 1.6763754045307445e-05,
+      "loss": 0.3976,
+      "step": 800
+    },
+    {
+      "epoch": 0.5461165048543689,
+      "grad_norm": 9.985793113708496,
+      "learning_rate": 1.6359223300970874e-05,
+      "loss": 0.4045,
+      "step": 900
+    },
+    {
+      "epoch": 0.6067961165048543,
+      "grad_norm": 5.920831203460693,
+      "learning_rate": 1.5954692556634304e-05,
+      "loss": 0.3661,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6674757281553398,
+      "grad_norm": 7.346311092376709,
+      "learning_rate": 1.5550161812297734e-05,
+      "loss": 0.3527,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7281553398058253,
+      "grad_norm": 7.223079681396484,
+      "learning_rate": 1.5145631067961166e-05,
+      "loss": 0.361,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7888349514563107,
+      "grad_norm": 5.1326799392700195,
+      "learning_rate": 1.4741100323624598e-05,
+      "loss": 0.3892,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8495145631067961,
+      "grad_norm": 7.782454967498779,
+      "learning_rate": 1.4336569579288026e-05,
+      "loss": 0.3502,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9101941747572816,
+      "grad_norm": 8.951793670654297,
+      "learning_rate": 1.3932038834951458e-05,
+      "loss": 0.2934,
+      "step": 1500
+    },
+    {
+      "epoch": 0.970873786407767,
+      "grad_norm": 9.385202407836914,
+      "learning_rate": 1.3527508090614887e-05,
+      "loss": 0.3436,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8803116147308782,
+      "eval_f1": 0.87963672068735,
+      "eval_loss": 0.30396342277526855,
+      "eval_precision": 0.8827457335389428,
+      "eval_recall": 0.8803116147308782,
+      "eval_runtime": 21.997,
+      "eval_samples_per_second": 256.762,
+      "eval_steps_per_second": 16.048,
+      "step": 1648
+    },
+    {
+      "epoch": 1.0315533980582525,
+      "grad_norm": 11.795103073120117,
+      "learning_rate": 1.3122977346278319e-05,
+      "loss": 0.2579,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0922330097087378,
+      "grad_norm": 3.647850751876831,
+      "learning_rate": 1.2718446601941749e-05,
+      "loss": 0.2231,
+      "step": 1800
+    },
+    {
+      "epoch": 1.1529126213592233,
+      "grad_norm": 6.818081855773926,
+      "learning_rate": 1.2313915857605179e-05,
+      "loss": 0.2381,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2135922330097086,
+      "grad_norm": 25.645702362060547,
+      "learning_rate": 1.190938511326861e-05,
+      "loss": 0.2778,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2742718446601942,
+      "grad_norm": 20.234249114990234,
+      "learning_rate": 1.1504854368932039e-05,
+      "loss": 0.2127,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3349514563106797,
+      "grad_norm": 5.801571846008301,
+      "learning_rate": 1.110032362459547e-05,
+      "loss": 0.2406,
+      "step": 2200
+    },
+    {
+      "epoch": 1.395631067961165,
+      "grad_norm": 3.5099356174468994,
+      "learning_rate": 1.0695792880258899e-05,
+      "loss": 0.2109,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4563106796116505,
+      "grad_norm": 13.334369659423828,
+      "learning_rate": 1.029126213592233e-05,
+      "loss": 0.2268,
+      "step": 2400
+    },
+    {
+      "epoch": 1.516990291262136,
+      "grad_norm": 1.7576143741607666,
+      "learning_rate": 9.88673139158576e-06,
+      "loss": 0.2549,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5776699029126213,
+      "grad_norm": 6.285398006439209,
+      "learning_rate": 9.482200647249192e-06,
+      "loss": 0.2078,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6383495145631068,
+      "grad_norm": 10.36067008972168,
+      "learning_rate": 9.077669902912622e-06,
+      "loss": 0.2391,
+      "step": 2700
+    },
+    {
+      "epoch": 1.6990291262135924,
+      "grad_norm": 7.475758075714111,
+      "learning_rate": 8.673139158576054e-06,
+      "loss": 0.2334,
+      "step": 2800
+    },
+    {
+      "epoch": 1.7597087378640777,
+      "grad_norm": 16.30022430419922,
+      "learning_rate": 8.268608414239483e-06,
+      "loss": 0.2316,
+      "step": 2900
+    },
+    {
+      "epoch": 1.820388349514563,
+      "grad_norm": 4.486410617828369,
+      "learning_rate": 7.864077669902913e-06,
+      "loss": 0.2239,
+      "step": 3000
+    },
+    {
+      "epoch": 1.8810679611650487,
+      "grad_norm": 24.858966827392578,
+      "learning_rate": 7.459546925566343e-06,
+      "loss": 0.2162,
+      "step": 3100
+    },
+    {
+      "epoch": 1.941747572815534,
+      "grad_norm": 18.323312759399414,
+      "learning_rate": 7.055016181229773e-06,
+      "loss": 0.2452,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9008498583569405,
+      "eval_f1": 0.8996119975479232,
+      "eval_loss": 0.2864818572998047,
+      "eval_precision": 0.9011561063745416,
+      "eval_recall": 0.9008498583569405,
+      "eval_runtime": 22.0341,
+      "eval_samples_per_second": 256.33,
+      "eval_steps_per_second": 16.021,
+      "step": 3296
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4944,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1745811035916288.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3296/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
+size 5304

checkpoint-4944/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "distilbert-base-multilingual-cased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 119547
+}

checkpoint-4944/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c5eb116208f77aeac8c1dd22271929d9fdbc47b8f91e8b401b6e8af3a4d4349
+size 541332756

checkpoint-4944/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8155baa2fbbdc05e6df9c79413c0b6e2ea203210035c4355d85c0b2bf4e99e22
+size 1082727610

checkpoint-4944/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07a5237c61fd9606d1962942985d8ec394dba78a7c8ffa761c2b48b2aa04b2f5
+size 14244

checkpoint-4944/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca493678282795f2e99cbd4840bd077221fa7cf771f4e64768bf266ad9336fef
+size 1064

checkpoint-4944/trainer_state.json ADDED Viewed

	@@ -0,0 +1,421 @@

+{
+  "best_metric": 0.905103252820856,
+  "best_model_checkpoint": "distilbert-type-classifier/checkpoint-4944",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 4944,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06067961165048544,
+      "grad_norm": 7.083491325378418,
+      "learning_rate": 1.959546925566343e-05,
+      "loss": 1.3291,
+      "step": 100
+    },
+    {
+      "epoch": 0.12135922330097088,
+      "grad_norm": 5.435772895812988,
+      "learning_rate": 1.919093851132686e-05,
+      "loss": 0.7948,
+      "step": 200
+    },
+    {
+      "epoch": 0.1820388349514563,
+      "grad_norm": 5.531534671783447,
+      "learning_rate": 1.8786407766990295e-05,
+      "loss": 0.59,
+      "step": 300
+    },
+    {
+      "epoch": 0.24271844660194175,
+      "grad_norm": 5.0550031661987305,
+      "learning_rate": 1.838187702265372e-05,
+      "loss": 0.5483,
+      "step": 400
+    },
+    {
+      "epoch": 0.30339805825242716,
+      "grad_norm": 9.326889038085938,
+      "learning_rate": 1.7977346278317155e-05,
+      "loss": 0.472,
+      "step": 500
+    },
+    {
+      "epoch": 0.3640776699029126,
+      "grad_norm": 11.223947525024414,
+      "learning_rate": 1.7572815533980585e-05,
+      "loss": 0.4334,
+      "step": 600
+    },
+    {
+      "epoch": 0.42475728155339804,
+      "grad_norm": 6.310647487640381,
+      "learning_rate": 1.7168284789644015e-05,
+      "loss": 0.3993,
+      "step": 700
+    },
+    {
+      "epoch": 0.4854368932038835,
+      "grad_norm": 13.495203018188477,
+      "learning_rate": 1.6763754045307445e-05,
+      "loss": 0.3976,
+      "step": 800
+    },
+    {
+      "epoch": 0.5461165048543689,
+      "grad_norm": 9.985793113708496,
+      "learning_rate": 1.6359223300970874e-05,
+      "loss": 0.4045,
+      "step": 900
+    },
+    {
+      "epoch": 0.6067961165048543,
+      "grad_norm": 5.920831203460693,
+      "learning_rate": 1.5954692556634304e-05,
+      "loss": 0.3661,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6674757281553398,
+      "grad_norm": 7.346311092376709,
+      "learning_rate": 1.5550161812297734e-05,
+      "loss": 0.3527,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7281553398058253,
+      "grad_norm": 7.223079681396484,
+      "learning_rate": 1.5145631067961166e-05,
+      "loss": 0.361,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7888349514563107,
+      "grad_norm": 5.1326799392700195,
+      "learning_rate": 1.4741100323624598e-05,
+      "loss": 0.3892,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8495145631067961,
+      "grad_norm": 7.782454967498779,
+      "learning_rate": 1.4336569579288026e-05,
+      "loss": 0.3502,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9101941747572816,
+      "grad_norm": 8.951793670654297,
+      "learning_rate": 1.3932038834951458e-05,
+      "loss": 0.2934,
+      "step": 1500
+    },
+    {
+      "epoch": 0.970873786407767,
+      "grad_norm": 9.385202407836914,
+      "learning_rate": 1.3527508090614887e-05,
+      "loss": 0.3436,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8803116147308782,
+      "eval_f1": 0.87963672068735,
+      "eval_loss": 0.30396342277526855,
+      "eval_precision": 0.8827457335389428,
+      "eval_recall": 0.8803116147308782,
+      "eval_runtime": 21.997,
+      "eval_samples_per_second": 256.762,
+      "eval_steps_per_second": 16.048,
+      "step": 1648
+    },
+    {
+      "epoch": 1.0315533980582525,
+      "grad_norm": 11.795103073120117,
+      "learning_rate": 1.3122977346278319e-05,
+      "loss": 0.2579,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0922330097087378,
+      "grad_norm": 3.647850751876831,
+      "learning_rate": 1.2718446601941749e-05,
+      "loss": 0.2231,
+      "step": 1800
+    },
+    {
+      "epoch": 1.1529126213592233,
+      "grad_norm": 6.818081855773926,
+      "learning_rate": 1.2313915857605179e-05,
+      "loss": 0.2381,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2135922330097086,
+      "grad_norm": 25.645702362060547,
+      "learning_rate": 1.190938511326861e-05,
+      "loss": 0.2778,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2742718446601942,
+      "grad_norm": 20.234249114990234,
+      "learning_rate": 1.1504854368932039e-05,
+      "loss": 0.2127,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3349514563106797,
+      "grad_norm": 5.801571846008301,
+      "learning_rate": 1.110032362459547e-05,
+      "loss": 0.2406,
+      "step": 2200
+    },
+    {
+      "epoch": 1.395631067961165,
+      "grad_norm": 3.5099356174468994,
+      "learning_rate": 1.0695792880258899e-05,
+      "loss": 0.2109,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4563106796116505,
+      "grad_norm": 13.334369659423828,
+      "learning_rate": 1.029126213592233e-05,
+      "loss": 0.2268,
+      "step": 2400
+    },
+    {
+      "epoch": 1.516990291262136,
+      "grad_norm": 1.7576143741607666,
+      "learning_rate": 9.88673139158576e-06,
+      "loss": 0.2549,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5776699029126213,
+      "grad_norm": 6.285398006439209,
+      "learning_rate": 9.482200647249192e-06,
+      "loss": 0.2078,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6383495145631068,
+      "grad_norm": 10.36067008972168,
+      "learning_rate": 9.077669902912622e-06,
+      "loss": 0.2391,
+      "step": 2700
+    },
+    {
+      "epoch": 1.6990291262135924,
+      "grad_norm": 7.475758075714111,
+      "learning_rate": 8.673139158576054e-06,
+      "loss": 0.2334,
+      "step": 2800
+    },
+    {
+      "epoch": 1.7597087378640777,
+      "grad_norm": 16.30022430419922,
+      "learning_rate": 8.268608414239483e-06,
+      "loss": 0.2316,
+      "step": 2900
+    },
+    {
+      "epoch": 1.820388349514563,
+      "grad_norm": 4.486410617828369,
+      "learning_rate": 7.864077669902913e-06,
+      "loss": 0.2239,
+      "step": 3000
+    },
+    {
+      "epoch": 1.8810679611650487,
+      "grad_norm": 24.858966827392578,
+      "learning_rate": 7.459546925566343e-06,
+      "loss": 0.2162,
+      "step": 3100
+    },
+    {
+      "epoch": 1.941747572815534,
+      "grad_norm": 18.323312759399414,
+      "learning_rate": 7.055016181229773e-06,
+      "loss": 0.2452,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9008498583569405,
+      "eval_f1": 0.8996119975479232,
+      "eval_loss": 0.2864818572998047,
+      "eval_precision": 0.9011561063745416,
+      "eval_recall": 0.9008498583569405,
+      "eval_runtime": 22.0341,
+      "eval_samples_per_second": 256.33,
+      "eval_steps_per_second": 16.021,
+      "step": 3296
+    },
+    {
+      "epoch": 2.0024271844660193,
+      "grad_norm": 22.042865753173828,
+      "learning_rate": 6.650485436893205e-06,
+      "loss": 0.1939,
+      "step": 3300
+    },
+    {
+      "epoch": 2.063106796116505,
+      "grad_norm": 3.2583961486816406,
+      "learning_rate": 6.245954692556635e-06,
+      "loss": 0.1526,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1237864077669903,
+      "grad_norm": 16.607648849487305,
+      "learning_rate": 5.841423948220065e-06,
+      "loss": 0.1431,
+      "step": 3500
+    },
+    {
+      "epoch": 2.1844660194174756,
+      "grad_norm": 5.757761478424072,
+      "learning_rate": 5.436893203883496e-06,
+      "loss": 0.1566,
+      "step": 3600
+    },
+    {
+      "epoch": 2.2451456310679614,
+      "grad_norm": 10.476051330566406,
+      "learning_rate": 5.0323624595469255e-06,
+      "loss": 0.1645,
+      "step": 3700
+    },
+    {
+      "epoch": 2.3058252427184467,
+      "grad_norm": 3.0033633708953857,
+      "learning_rate": 4.627831715210356e-06,
+      "loss": 0.1461,
+      "step": 3800
+    },
+    {
+      "epoch": 2.366504854368932,
+      "grad_norm": 18.662181854248047,
+      "learning_rate": 4.223300970873786e-06,
+      "loss": 0.1552,
+      "step": 3900
+    },
+    {
+      "epoch": 2.4271844660194173,
+      "grad_norm": 3.020024538040161,
+      "learning_rate": 3.818770226537217e-06,
+      "loss": 0.1793,
+      "step": 4000
+    },
+    {
+      "epoch": 2.487864077669903,
+      "grad_norm": 4.562654495239258,
+      "learning_rate": 3.4142394822006474e-06,
+      "loss": 0.143,
+      "step": 4100
+    },
+    {
+      "epoch": 2.5485436893203883,
+      "grad_norm": 11.560980796813965,
+      "learning_rate": 3.0097087378640778e-06,
+      "loss": 0.1584,
+      "step": 4200
+    },
+    {
+      "epoch": 2.6092233009708736,
+      "grad_norm": 6.995797634124756,
+      "learning_rate": 2.6051779935275086e-06,
+      "loss": 0.1488,
+      "step": 4300
+    },
+    {
+      "epoch": 2.6699029126213594,
+      "grad_norm": 2.360529661178589,
+      "learning_rate": 2.2006472491909385e-06,
+      "loss": 0.1338,
+      "step": 4400
+    },
+    {
+      "epoch": 2.7305825242718447,
+      "grad_norm": 19.43924903869629,
+      "learning_rate": 1.796116504854369e-06,
+      "loss": 0.1531,
+      "step": 4500
+    },
+    {
+      "epoch": 2.79126213592233,
+      "grad_norm": 6.97533655166626,
+      "learning_rate": 1.3915857605177997e-06,
+      "loss": 0.1586,
+      "step": 4600
+    },
+    {
+      "epoch": 2.8519417475728153,
+      "grad_norm": 1.0474483966827393,
+      "learning_rate": 9.870550161812298e-07,
+      "loss": 0.1455,
+      "step": 4700
+    },
+    {
+      "epoch": 2.912621359223301,
+      "grad_norm": 25.780860900878906,
+      "learning_rate": 5.825242718446603e-07,
+      "loss": 0.1604,
+      "step": 4800
+    },
+    {
+      "epoch": 2.9733009708737863,
+      "grad_norm": 5.437297821044922,
+      "learning_rate": 1.7799352750809063e-07,
+      "loss": 0.1497,
+      "step": 4900
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9061614730878187,
+      "eval_f1": 0.905103252820856,
+      "eval_loss": 0.313060998916626,
+      "eval_precision": 0.9060485063772568,
+      "eval_recall": 0.9061614730878187,
+      "eval_runtime": 23.35,
+      "eval_samples_per_second": 241.884,
+      "eval_steps_per_second": 15.118,
+      "step": 4944
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4944,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2618716553874432.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4944/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
+size 5304

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "distilbert-base-multilingual-cased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 119547
+}

confusion_matrix.png ADDED Viewed

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c5eb116208f77aeac8c1dd22271929d9fdbc47b8f91e8b401b6e8af3a4d4349
+size 541332756

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
+size 5304

type_mapping.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"0": "coherent", "1": "grammatical_errors", "2": "random_bytes", "3": "random_tokens", "4": "random_words", "5": "run_on", "6": "word_soup"}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff