File size: 7,266 Bytes
6639f75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA

This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more
compatible with current library versions while still teaching JSON-only responses.

Key hyperparameters:
- LoRA rank: 8 (small adapter for efficiency)
- Epochs: 3 (enough to learn pattern without overfitting)
- Learning rate: 5e-5 (conservative for stability)
"""

import json
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import os

def load_preference_pairs(file_path="tool_pairs.jsonl"):
    """Load and parse the JSONL preference pairs."""
    pairs = []
    with open(file_path, 'r') as f:
        for line in f:
            pairs.append(json.loads(line.strip()))
    return pairs

def format_for_sft(pairs, tokenizer):
    """Convert pairs to SFT format - use only the 'chosen' responses."""
    formatted = []
    for pair in pairs:
        # Combine prompt + chosen response as training example
        full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
        formatted.append({"text": full_text})
    return formatted

def tokenize_function(examples, tokenizer, max_length=512):
    """Tokenize the training examples."""
    # Tokenize the text
    tokenized = tokenizer(
        examples["text"],
        truncation=True,
        padding=False,
        max_length=max_length,
        return_tensors=None
    )
    
    # For causal LM, labels are the same as input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

def main():
    print("πŸš€ Starting Dynamic Function-Calling Agent Training (SFT)")
    print("=" * 60)
    
    # 1. Load the base model and tokenizer
    print("πŸ“₯ Loading SmolLM2-1.7B model and tokenizer...")
    model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
        trust_remote_code=True
    )
    
    print(f"βœ… Loaded model: {model_name}")
    print(f"πŸ”§ Model dtype: {model.dtype}")
    print(f"πŸ’Ύ Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")
    
    # 2. Set up LoRA configuration
    print("\nπŸ”© Setting up LoRA adapter (rank 8)...")
    lora_config = LoraConfig(
        r=8,                    # Low rank - small adapter
        lora_alpha=16,          # Scaling factor (typically 2x rank)
        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_dropout=0.1,       # Prevent overfitting
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    
    model = get_peft_model(model, lora_config)
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    
    print(f"βœ… LoRA adapter attached")
    print(f"🎯 Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
    
    # 3. Load and prepare training data
    print("\nπŸ“Š Loading preference pairs...")
    pairs = load_preference_pairs()
    formatted_pairs = format_for_sft(pairs, tokenizer)
    
    print(f"βœ… Loaded {len(pairs)} preference pairs")
    print("πŸ“ Sample training text:")
    print(formatted_pairs[0]["text"][:200] + "...")
    
    # Create dataset and tokenize
    train_dataset = Dataset.from_list(formatted_pairs)
    tokenized_dataset = train_dataset.map(
        lambda x: tokenize_function(x, tokenizer),
        batched=True,
        remove_columns=train_dataset.column_names
    )
    
    print(f"πŸ“Š Tokenized dataset size: {len(tokenized_dataset)} examples")
    
    # 4. Set up training arguments
    print("\nβš™οΈ Configuring training (3 epochs)...")
    training_args = TrainingArguments(
        output_dir="./smollm_tool_adapter",
        num_train_epochs=3,
        per_device_train_batch_size=1,      # Small batch for memory efficiency
        gradient_accumulation_steps=4,       # Effective batch size = 4
        learning_rate=5e-5,
        warmup_steps=10,
        logging_steps=1,
        save_steps=50,
        save_total_limit=2,
        remove_unused_columns=False,
        fp16=torch.cuda.is_available(),      # Use fp16 if GPU available
        dataloader_pin_memory=False,
        report_to=None,                      # Disable wandb logging
        logging_dir="./logs"
    )
    
    # 5. Set up data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,  # We're doing causal LM, not masked LM
    )
    
    # 6. Initialize trainer
    print("πŸ‹οΈ Initializing trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator,
    )
    
    print("βœ… Trainer ready")
    
    # 7. Start training
    print("\n🎯 Starting training...")
    print("⏱️  This should take ~8-15 minutes on M4 Max, longer on CPU")
    
    # Train the model directly
    print("πŸš€ Beginning training...")
    train_result = trainer.train()
    
    print("\nπŸŽ‰ Training completed!")
    print(f"πŸ“Š Final training loss: {train_result.training_loss:.4f}")
    print(f"⏱️ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds")
    
    # 8. Save the fine-tuned adapter
    print("\nπŸ’Ύ Saving model adapter...")
    model.save_pretrained("./smollm_tool_adapter")
    tokenizer.save_pretrained("./smollm_tool_adapter")
    
    print("βœ… Model saved to './smollm_tool_adapter'")
    print("🏁 Training complete! Ready for testing.")
    
    # 9. Quick test
    print("\nπŸ§ͺ Quick functionality test...")
    test_prompt = """<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>

<schema>
{
  "name": "get_stock_price",
  "description": "Return the latest price for a given ticker symbol.",
  "parameters": {
    "type": "object",
    "properties": {
      "ticker": {"type": "string"}
    },
    "required": ["ticker"]
  }
}
</schema>

<|im_start|>user
What's Microsoft trading at?<|im_end|>
<|im_start|>assistant
"""
    
    inputs = tokenizer(test_prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
    print(f"πŸ€– Model response: {response.strip()}")
    
    return model, tokenizer

if __name__ == "__main__":
    model, tokenizer = main()