File size: 7,266 Bytes
6639f75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
"""
tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA
This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more
compatible with current library versions while still teaching JSON-only responses.
Key hyperparameters:
- LoRA rank: 8 (small adapter for efficiency)
- Epochs: 3 (enough to learn pattern without overfitting)
- Learning rate: 5e-5 (conservative for stability)
"""
import json
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import os
def load_preference_pairs(file_path="tool_pairs.jsonl"):
"""Load and parse the JSONL preference pairs."""
pairs = []
with open(file_path, 'r') as f:
for line in f:
pairs.append(json.loads(line.strip()))
return pairs
def format_for_sft(pairs, tokenizer):
"""Convert pairs to SFT format - use only the 'chosen' responses."""
formatted = []
for pair in pairs:
# Combine prompt + chosen response as training example
full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
formatted.append({"text": full_text})
return formatted
def tokenize_function(examples, tokenizer, max_length=512):
"""Tokenize the training examples."""
# Tokenize the text
tokenized = tokenizer(
examples["text"],
truncation=True,
padding=False,
max_length=max_length,
return_tensors=None
)
# For causal LM, labels are the same as input_ids
tokenized["labels"] = tokenized["input_ids"].copy()
return tokenized
def main():
print("π Starting Dynamic Function-Calling Agent Training (SFT)")
print("=" * 60)
# 1. Load the base model and tokenizer
print("π₯ Loading SmolLM2-1.7B model and tokenizer...")
model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True
)
print(f"β
Loaded model: {model_name}")
print(f"π§ Model dtype: {model.dtype}")
print(f"πΎ Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")
# 2. Set up LoRA configuration
print("\nπ© Setting up LoRA adapter (rank 8)...")
lora_config = LoraConfig(
r=8, # Low rank - small adapter
lora_alpha=16, # Scaling factor (typically 2x rank)
target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.1, # Prevent overfitting
bias="none",
task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"β
LoRA adapter attached")
print(f"π― Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
# 3. Load and prepare training data
print("\nπ Loading preference pairs...")
pairs = load_preference_pairs()
formatted_pairs = format_for_sft(pairs, tokenizer)
print(f"β
Loaded {len(pairs)} preference pairs")
print("π Sample training text:")
print(formatted_pairs[0]["text"][:200] + "...")
# Create dataset and tokenize
train_dataset = Dataset.from_list(formatted_pairs)
tokenized_dataset = train_dataset.map(
lambda x: tokenize_function(x, tokenizer),
batched=True,
remove_columns=train_dataset.column_names
)
print(f"π Tokenized dataset size: {len(tokenized_dataset)} examples")
# 4. Set up training arguments
print("\nβοΈ Configuring training (3 epochs)...")
training_args = TrainingArguments(
output_dir="./smollm_tool_adapter",
num_train_epochs=3,
per_device_train_batch_size=1, # Small batch for memory efficiency
gradient_accumulation_steps=4, # Effective batch size = 4
learning_rate=5e-5,
warmup_steps=10,
logging_steps=1,
save_steps=50,
save_total_limit=2,
remove_unused_columns=False,
fp16=torch.cuda.is_available(), # Use fp16 if GPU available
dataloader_pin_memory=False,
report_to=None, # Disable wandb logging
logging_dir="./logs"
)
# 5. Set up data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False, # We're doing causal LM, not masked LM
)
# 6. Initialize trainer
print("ποΈ Initializing trainer...")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=data_collator,
)
print("β
Trainer ready")
# 7. Start training
print("\nπ― Starting training...")
print("β±οΈ This should take ~8-15 minutes on M4 Max, longer on CPU")
# Train the model directly
print("π Beginning training...")
train_result = trainer.train()
print("\nπ Training completed!")
print(f"π Final training loss: {train_result.training_loss:.4f}")
print(f"β±οΈ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds")
# 8. Save the fine-tuned adapter
print("\nπΎ Saving model adapter...")
model.save_pretrained("./smollm_tool_adapter")
tokenizer.save_pretrained("./smollm_tool_adapter")
print("β
Model saved to './smollm_tool_adapter'")
print("π Training complete! Ready for testing.")
# 9. Quick test
print("\nπ§ͺ Quick functionality test...")
test_prompt = """<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
<schema>
{
"name": "get_stock_price",
"description": "Return the latest price for a given ticker symbol.",
"parameters": {
"type": "object",
"properties": {
"ticker": {"type": "string"}
},
"required": ["ticker"]
}
}
</schema>
<|im_start|>user
What's Microsoft trading at?<|im_end|>
<|im_start|>assistant
"""
inputs = tokenizer(test_prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=50,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
print(f"π€ Model response: {response.strip()}")
return model, tokenizer
if __name__ == "__main__":
model, tokenizer = main() |