""" tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more compatible with current library versions while still teaching JSON-only responses. Key hyperparameters: - LoRA rank: 8 (small adapter for efficiency) - Epochs: 3 (enough to learn pattern without overfitting) - Learning rate: 5e-5 (conservative for stability) """ import json import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling ) from peft import LoraConfig, get_peft_model, TaskType from datasets import Dataset import os def load_preference_pairs(file_path="tool_pairs.jsonl"): """Load and parse the JSONL preference pairs.""" pairs = [] with open(file_path, 'r') as f: for line in f: pairs.append(json.loads(line.strip())) return pairs def format_for_sft(pairs, tokenizer): """Convert pairs to SFT format - use only the 'chosen' responses.""" formatted = [] for pair in pairs: # Combine prompt + chosen response as training example full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token formatted.append({"text": full_text}) return formatted def tokenize_function(examples, tokenizer, max_length=512): """Tokenize the training examples.""" # Tokenize the text tokenized = tokenizer( examples["text"], truncation=True, padding=False, max_length=max_length, return_tensors=None ) # For causal LM, labels are the same as input_ids tokenized["labels"] = tokenized["input_ids"].copy() return tokenized def main(): print("๐Ÿš€ Starting Dynamic Function-Calling Agent Training (SFT)") print("=" * 60) # 1. Load the base model and tokenizer print("๐Ÿ“ฅ Loading SmolLM2-1.7B model and tokenizer...") model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, trust_remote_code=True ) print(f"โœ… Loaded model: {model_name}") print(f"๐Ÿ”ง Model dtype: {model.dtype}") print(f"๐Ÿ’พ Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters") # 2. Set up LoRA configuration print("\n๐Ÿ”ฉ Setting up LoRA adapter (rank 8)...") lora_config = LoraConfig( r=8, # Low rank - small adapter lora_alpha=16, # Scaling factor (typically 2x rank) target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.1, # Prevent overfitting bias="none", task_type=TaskType.CAUSAL_LM ) model = get_peft_model(model, lora_config) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in model.parameters()) print(f"โœ… LoRA adapter attached") print(f"๐ŸŽฏ Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)") # 3. Load and prepare training data print("\n๐Ÿ“Š Loading preference pairs...") pairs = load_preference_pairs() formatted_pairs = format_for_sft(pairs, tokenizer) print(f"โœ… Loaded {len(pairs)} preference pairs") print("๐Ÿ“ Sample training text:") print(formatted_pairs[0]["text"][:200] + "...") # Create dataset and tokenize train_dataset = Dataset.from_list(formatted_pairs) tokenized_dataset = train_dataset.map( lambda x: tokenize_function(x, tokenizer), batched=True, remove_columns=train_dataset.column_names ) print(f"๐Ÿ“Š Tokenized dataset size: {len(tokenized_dataset)} examples") # 4. Set up training arguments print("\nโš™๏ธ Configuring training (3 epochs)...") training_args = TrainingArguments( output_dir="./smollm_tool_adapter", num_train_epochs=3, per_device_train_batch_size=1, # Small batch for memory efficiency gradient_accumulation_steps=4, # Effective batch size = 4 learning_rate=5e-5, warmup_steps=10, logging_steps=1, save_steps=50, save_total_limit=2, remove_unused_columns=False, fp16=torch.cuda.is_available(), # Use fp16 if GPU available dataloader_pin_memory=False, report_to=None, # Disable wandb logging logging_dir="./logs" ) # 5. Set up data collator data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False, # We're doing causal LM, not masked LM ) # 6. Initialize trainer print("๐Ÿ‹๏ธ Initializing trainer...") trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, data_collator=data_collator, ) print("โœ… Trainer ready") # 7. Start training print("\n๐ŸŽฏ Starting training...") print("โฑ๏ธ This should take ~8-15 minutes on M4 Max, longer on CPU") # Train the model directly print("๐Ÿš€ Beginning training...") train_result = trainer.train() print("\n๐ŸŽ‰ Training completed!") print(f"๐Ÿ“Š Final training loss: {train_result.training_loss:.4f}") print(f"โฑ๏ธ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds") # 8. Save the fine-tuned adapter print("\n๐Ÿ’พ Saving model adapter...") model.save_pretrained("./smollm_tool_adapter") tokenizer.save_pretrained("./smollm_tool_adapter") print("โœ… Model saved to './smollm_tool_adapter'") print("๐Ÿ Training complete! Ready for testing.") # 9. Quick test print("\n๐Ÿงช Quick functionality test...") test_prompt = """<|im_start|>system You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|> { "name": "get_stock_price", "description": "Return the latest price for a given ticker symbol.", "parameters": { "type": "object", "properties": { "ticker": {"type": "string"} }, "required": ["ticker"] } } <|im_start|>user What's Microsoft trading at?<|im_end|> <|im_start|>assistant """ inputs = tokenizer(test_prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=50, temperature=0.1, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True) print(f"๐Ÿค– Model response: {response.strip()}") return model, tokenizer if __name__ == "__main__": model, tokenizer = main()