Dynamic-Function-Calling-Agent / tool_trainer_simple.py
jlov7's picture
feat: Multi-tool selection and robustness testing
6639f75
raw
history blame
7.27 kB
"""
tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA
This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more
compatible with current library versions while still teaching JSON-only responses.
Key hyperparameters:
- LoRA rank: 8 (small adapter for efficiency)
- Epochs: 3 (enough to learn pattern without overfitting)
- Learning rate: 5e-5 (conservative for stability)
"""
import json
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import os
def load_preference_pairs(file_path="tool_pairs.jsonl"):
"""Load and parse the JSONL preference pairs."""
pairs = []
with open(file_path, 'r') as f:
for line in f:
pairs.append(json.loads(line.strip()))
return pairs
def format_for_sft(pairs, tokenizer):
"""Convert pairs to SFT format - use only the 'chosen' responses."""
formatted = []
for pair in pairs:
# Combine prompt + chosen response as training example
full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
formatted.append({"text": full_text})
return formatted
def tokenize_function(examples, tokenizer, max_length=512):
"""Tokenize the training examples."""
# Tokenize the text
tokenized = tokenizer(
examples["text"],
truncation=True,
padding=False,
max_length=max_length,
return_tensors=None
)
# For causal LM, labels are the same as input_ids
tokenized["labels"] = tokenized["input_ids"].copy()
return tokenized
def main():
print("πŸš€ Starting Dynamic Function-Calling Agent Training (SFT)")
print("=" * 60)
# 1. Load the base model and tokenizer
print("πŸ“₯ Loading SmolLM2-1.7B model and tokenizer...")
model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True
)
print(f"βœ… Loaded model: {model_name}")
print(f"πŸ”§ Model dtype: {model.dtype}")
print(f"πŸ’Ύ Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")
# 2. Set up LoRA configuration
print("\nπŸ”© Setting up LoRA adapter (rank 8)...")
lora_config = LoraConfig(
r=8, # Low rank - small adapter
lora_alpha=16, # Scaling factor (typically 2x rank)
target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.1, # Prevent overfitting
bias="none",
task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"βœ… LoRA adapter attached")
print(f"🎯 Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
# 3. Load and prepare training data
print("\nπŸ“Š Loading preference pairs...")
pairs = load_preference_pairs()
formatted_pairs = format_for_sft(pairs, tokenizer)
print(f"βœ… Loaded {len(pairs)} preference pairs")
print("πŸ“ Sample training text:")
print(formatted_pairs[0]["text"][:200] + "...")
# Create dataset and tokenize
train_dataset = Dataset.from_list(formatted_pairs)
tokenized_dataset = train_dataset.map(
lambda x: tokenize_function(x, tokenizer),
batched=True,
remove_columns=train_dataset.column_names
)
print(f"πŸ“Š Tokenized dataset size: {len(tokenized_dataset)} examples")
# 4. Set up training arguments
print("\nβš™οΈ Configuring training (3 epochs)...")
training_args = TrainingArguments(
output_dir="./smollm_tool_adapter",
num_train_epochs=3,
per_device_train_batch_size=1, # Small batch for memory efficiency
gradient_accumulation_steps=4, # Effective batch size = 4
learning_rate=5e-5,
warmup_steps=10,
logging_steps=1,
save_steps=50,
save_total_limit=2,
remove_unused_columns=False,
fp16=torch.cuda.is_available(), # Use fp16 if GPU available
dataloader_pin_memory=False,
report_to=None, # Disable wandb logging
logging_dir="./logs"
)
# 5. Set up data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False, # We're doing causal LM, not masked LM
)
# 6. Initialize trainer
print("πŸ‹οΈ Initializing trainer...")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=data_collator,
)
print("βœ… Trainer ready")
# 7. Start training
print("\n🎯 Starting training...")
print("⏱️ This should take ~8-15 minutes on M4 Max, longer on CPU")
# Train the model directly
print("πŸš€ Beginning training...")
train_result = trainer.train()
print("\nπŸŽ‰ Training completed!")
print(f"πŸ“Š Final training loss: {train_result.training_loss:.4f}")
print(f"⏱️ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds")
# 8. Save the fine-tuned adapter
print("\nπŸ’Ύ Saving model adapter...")
model.save_pretrained("./smollm_tool_adapter")
tokenizer.save_pretrained("./smollm_tool_adapter")
print("βœ… Model saved to './smollm_tool_adapter'")
print("🏁 Training complete! Ready for testing.")
# 9. Quick test
print("\nπŸ§ͺ Quick functionality test...")
test_prompt = """<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>
<schema>
{
"name": "get_stock_price",
"description": "Return the latest price for a given ticker symbol.",
"parameters": {
"type": "object",
"properties": {
"ticker": {"type": "string"}
},
"required": ["ticker"]
}
}
</schema>
<|im_start|>user
What's Microsoft trading at?<|im_end|>
<|im_start|>assistant
"""
inputs = tokenizer(test_prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=50,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
print(f"πŸ€– Model response: {response.strip()}")
return model, tokenizer
if __name__ == "__main__":
model, tokenizer = main()