GY2233
/

R2R_router_qwenr1

Text Classification

Model card Files Files and versions

GY2233 commited on Jul 24

Commit

cf0bcee

·

verified ·

1 Parent(s): 706ce41

Upload R2R router config

Files changed (1) hide show

config.json +97 -0

config.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "model": {
+    "model_type": "HiddenStatesTokenLMHeadLogitsClassifier",
+    "init_args": {
+      "hidden_states_size": 1536,
+      "logits_size": 100,
+      "hidden_dims": [
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024
+      ],
+      "expansion_factor": 4,
+      "dropout_rate": 0.1,
+      "use_position_embedding": false,
+      "freeze_lm_head": true,
+      "normalize_input": false,
+      "pretrained_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    },
+    "model_specific_args": {},
+    "input_type": [
+      "hidden_states",
+      "token",
+      "logits"
+    ],
+    "output_type": "binary"
+  },
+  "data": {
+    "train": {
+      "path": [
+        "local:output_qwen_r1/query_dataset_train/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
+      ],
+      "type": "divergent",
+      "input_prefix": "small_"
+    },
+    "test": {
+      "path": [
+        "local:output_qwen_r1/query_dataset_validation/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
+      ],
+      "type": "divergent",
+      "input_prefix": "small_"
+    }
+  },
+  "training": {
+    "optimizer": {
+      "lr": 5e-05,
+      "weight_decay": 0.0005
+    },
+    "params": {
+      "num_epochs": 50,
+      "batch_size": 1024,
+      "patience": 10,
+      "device": "cuda"
+    },
+    "loss": {
+      "type": "BCEWithLogitsLoss",
+      "recall_factor": 1.0
+    },
+    "validation": {
+      "valid_freq": 2
+    },
+    "dtype": "float32"
+  },
+  "optimizing": {
+    "type": "threshold",
+    "min_recall": 0.95
+  },
+  "output": {
+    "output_dir": "resource/default_router.pt",
+    "checkpoint_dir": "output/checkpoint",
+    "model_name": null
+  },
+  "result": {
+    "model_path": "resource/default_router.pt/classifier_20250725_071911.pt",
+    "results": {
+      "threshold": 0.41585858585858587,
+      "best_epoch": 4,
+      "best_val_loss": 0.7956125675237121,
+      "final_metrics": {
+        "accuracy": 0.7007202534916438,
+        "precision": 0.2664509538449507,
+        "recall": 0.9510688780850916,
+        "f1": 0.4162777526774294,
+        "positive_rate": 0.40050413829284653
+      },
+      "pre_opt_metrics": {
+        "accuracy": 0.6180255418711208,
+        "precision": 0.6029766343289514,
+        "recall": 0.9264055921282188,
+        "f1": 0.7304922422944558,
+        "positive_rate": 0.8585155818412245
+      }
+    }
+  }
+}