Spaces:

jlov7
/

Dynamic-Function-Calling-Agent

Sleeping

App Files Files Community

Dynamic-Function-Calling-Agent / tool_trainer_simple.py

jlov7

feat: Multi-tool selection and robustness testing

6639f75 about 1 month ago

raw

history blame

7.27 kB

	"""
	tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA

	This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more
	compatible with current library versions while still teaching JSON-only responses.

	Key hyperparameters:
	- LoRA rank: 8 (small adapter for efficiency)
	- Epochs: 3 (enough to learn pattern without overfitting)
	- Learning rate: 5e-5 (conservative for stability)
	"""

	import json
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TrainingArguments,
	Trainer,
	DataCollatorForLanguageModeling
	)
	from peft import LoraConfig, get_peft_model, TaskType
	from datasets import Dataset
	import os

	def load_preference_pairs(file_path="tool_pairs.jsonl"):
	"""Load and parse the JSONL preference pairs."""
	pairs = []
	with open(file_path, 'r') as f:
	for line in f:
	pairs.append(json.loads(line.strip()))
	return pairs

	def format_for_sft(pairs, tokenizer):
	"""Convert pairs to SFT format - use only the 'chosen' responses."""
	formatted = []
	for pair in pairs:
	# Combine prompt + chosen response as training example
	full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
	formatted.append({"text": full_text})
	return formatted

	def tokenize_function(examples, tokenizer, max_length=512):
	"""Tokenize the training examples."""
	# Tokenize the text
	tokenized = tokenizer(
	examples["text"],
	truncation=True,
	padding=False,
	max_length=max_length,
	return_tensors=None
	)

	# For causal LM, labels are the same as input_ids
	tokenized["labels"] = tokenized["input_ids"].copy()
	return tokenized

	def main():
	print("🚀 Starting Dynamic Function-Calling Agent Training (SFT)")
	print("=" * 60)

	# 1. Load the base model and tokenizer
	print("📥 Loading SmolLM2-1.7B model and tokenizer...")
	model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True
	)

	print(f"✅ Loaded model: {model_name}")
	print(f"🔧 Model dtype: {model.dtype}")
	print(f"💾 Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")

	# 2. Set up LoRA configuration
	print("\n🔩 Setting up LoRA adapter (rank 8)...")
	lora_config = LoraConfig(
	r=8, # Low rank - small adapter
	lora_alpha=16, # Scaling factor (typically 2x rank)
	target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
	lora_dropout=0.1, # Prevent overfitting
	bias="none",
	task_type=TaskType.CAUSAL_LM
	)

	model = get_peft_model(model, lora_config)
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	total_params = sum(p.numel() for p in model.parameters())

	print(f"✅ LoRA adapter attached")
	print(f"🎯 Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")

	# 3. Load and prepare training data
	print("\n📊 Loading preference pairs...")
	pairs = load_preference_pairs()
	formatted_pairs = format_for_sft(pairs, tokenizer)

	print(f"✅ Loaded {len(pairs)} preference pairs")
	print("📝 Sample training text:")
	print(formatted_pairs[0]["text"][:200] + "...")

	# Create dataset and tokenize
	train_dataset = Dataset.from_list(formatted_pairs)
	tokenized_dataset = train_dataset.map(
	lambda x: tokenize_function(x, tokenizer),
	batched=True,
	remove_columns=train_dataset.column_names
	)

	print(f"📊 Tokenized dataset size: {len(tokenized_dataset)} examples")

	# 4. Set up training arguments
	print("\n⚙️ Configuring training (3 epochs)...")
	training_args = TrainingArguments(
	output_dir="./smollm_tool_adapter",
	num_train_epochs=3,
	per_device_train_batch_size=1, # Small batch for memory efficiency
	gradient_accumulation_steps=4, # Effective batch size = 4
	learning_rate=5e-5,
	warmup_steps=10,
	logging_steps=1,
	save_steps=50,
	save_total_limit=2,
	remove_unused_columns=False,
	fp16=torch.cuda.is_available(), # Use fp16 if GPU available
	dataloader_pin_memory=False,
	report_to=None, # Disable wandb logging
	logging_dir="./logs"
	)

	# 5. Set up data collator
	data_collator = DataCollatorForLanguageModeling(
	tokenizer=tokenizer,
	mlm=False, # We're doing causal LM, not masked LM
	)

	# 6. Initialize trainer
	print("🏋️ Initializing trainer...")
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_dataset,
	data_collator=data_collator,
	)

	print("✅ Trainer ready")

	# 7. Start training
	print("\n🎯 Starting training...")
	print("⏱️ This should take ~8-15 minutes on M4 Max, longer on CPU")

	# Train the model directly
	print("🚀 Beginning training...")
	train_result = trainer.train()

	print("\n🎉 Training completed!")
	print(f"📊 Final training loss: {train_result.training_loss:.4f}")
	print(f"⏱️ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds")

	# 8. Save the fine-tuned adapter
	print("\n💾 Saving model adapter...")
	model.save_pretrained("./smollm_tool_adapter")
	tokenizer.save_pretrained("./smollm_tool_adapter")

	print("✅ Model saved to './smollm_tool_adapter'")
	print("🏁 Training complete! Ready for testing.")

	# 9. Quick test
	print("\n🧪 Quick functionality test...")
	test_prompt = """<\|im_start\|>system
	You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<\|im_end\|>

	<schema>
	{
	"name": "get_stock_price",
	"description": "Return the latest price for a given ticker symbol.",
	"parameters": {
	"type": "object",
	"properties": {
	"ticker": {"type": "string"}
	},
	"required": ["ticker"]
	}
	}
	</schema>

	<\|im_start\|>user
	What's Microsoft trading at?<\|im_end\|>
	<\|im_start\|>assistant
	"""

	inputs = tokenizer(test_prompt, return_tensors="pt")
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=50,
	temperature=0.1,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
	print(f"🤖 Model response: {response.strip()}")

	return model, tokenizer

	if __name__ == "__main__":
	model, tokenizer = main()