ThatsGroes
/

SmolLM2-360M-Instruct-summarizer

+wandb_config:
+  wandb_project: "llm_dialog_summarizer_faster"
+  run_name: SmolLM2-360M-Instruct-large-R
+model_config:
+  model_id: "HuggingFaceTB/SmolLM2-360M-Instruct"
+  load_in_4bit: False
+  max_seq_length: 8192
+lora_config:
+  r: 64
+  lora_alpha: 64
+  use_rslora: True
+sft_config:
+  learning_rate: 0.0003
+  epochs: 2
+  optimizer: "adamw_8bit"
+  warmup_steps: 100
+  weight_decay: 0.01
+  lr_scheduler_type: "linear"
+  seed: 90201
+  dataset_text_field: "text"