"""
tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA

This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more
compatible with current library versions while still teaching JSON-only responses.

Key hyperparameters:
- LoRA rank: 8 (small adapter for efficiency)
- Epochs: 3 (enough to learn pattern without overfitting)
- Learning rate: 5e-5 (conservative for stability)
"""

import json
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import os

def load_preference_pairs(file_path="tool_pairs.jsonl"):
    """Load and parse the JSONL preference pairs."""
    pairs = []
    with open(file_path, 'r') as f:
        for line in f:
            pairs.append(json.loads(line.strip()))
    return pairs

def format_for_sft(pairs, tokenizer):
    """Convert pairs to SFT format - use only the 'chosen' responses."""
    formatted = []
    for pair in pairs:
        # Combine prompt + chosen response as training example
        full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
        formatted.append({"text": full_text})
    return formatted

def tokenize_function(examples, tokenizer, max_length=512):
    """Tokenize the training examples."""
    # Tokenize the text
    tokenized = tokenizer(
        examples["text"],
        truncation=True,
        padding=False,
        max_length=max_length,
        return_tensors=None
    )
    
    # For causal LM, labels are the same as input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

def main():
    print("🚀 Starting Dynamic Function-Calling Agent Training (SFT)")
    print("=" * 60)
    
    # 1. Load the base model and tokenizer
    print("📥 Loading SmolLM2-1.7B model and tokenizer...")
    model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
        trust_remote_code=True
    )
    
    print(f"✅ Loaded model: {model_name}")
    print(f"🔧 Model dtype: {model.dtype}")
    print(f"💾 Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")
    
    # 2. Set up LoRA configuration
    print("\n🔩 Setting up LoRA adapter (rank 8)...")
    lora_config = LoraConfig(
        r=8,                    # Low rank - small adapter
        lora_alpha=16,          # Scaling factor (typically 2x rank)
        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_dropout=0.1,       # Prevent overfitting
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    
    model = get_peft_model(model, lora_config)
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    
    print(f"✅ LoRA adapter attached")
    print(f"🎯 Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
    
    # 3. Load and prepare training data
    print("\n📊 Loading preference pairs...")
    pairs = load_preference_pairs()
    formatted_pairs = format_for_sft(pairs, tokenizer)
    
    print(f"✅ Loaded {len(pairs)} preference pairs")
    print("📝 Sample training text:")
    print(formatted_pairs[0]["text"][:200] + "...")
    
    # Create dataset and tokenize
    train_dataset = Dataset.from_list(formatted_pairs)
    tokenized_dataset = train_dataset.map(
        lambda x: tokenize_function(x, tokenizer),
        batched=True,
        remove_columns=train_dataset.column_names
    )
    
    print(f"📊 Tokenized dataset size: {len(tokenized_dataset)} examples")
    
    # 4. Set up training arguments
    print("\n⚙️ Configuring training (3 epochs)...")
    training_args = TrainingArguments(
        output_dir="./smollm_tool_adapter",
        num_train_epochs=3,
        per_device_train_batch_size=1,      # Small batch for memory efficiency
        gradient_accumulation_steps=4,       # Effective batch size = 4
        learning_rate=5e-5,
        warmup_steps=10,
        logging_steps=1,
        save_steps=50,
        save_total_limit=2,
        remove_unused_columns=False,
        fp16=torch.cuda.is_available(),      # Use fp16 if GPU available
        dataloader_pin_memory=False,
        report_to=None,                      # Disable wandb logging
        logging_dir="./logs"
    )
    
    # 5. Set up data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,  # We're doing causal LM, not masked LM
    )
    
    # 6. Initialize trainer
    print("🏋️ Initializing trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator,
    )
    
    print("✅ Trainer ready")
    
    # 7. Start training
    print("\n🎯 Starting training...")
    print("⏱️  This should take ~8-15 minutes on M4 Max, longer on CPU")
    
    # Train the model directly
    print("🚀 Beginning training...")
    train_result = trainer.train()
    
    print("\n🎉 Training completed!")
    print(f"📊 Final training loss: {train_result.training_loss:.4f}")
    print(f"⏱️ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds")
    
    # 8. Save the fine-tuned adapter
    print("\n💾 Saving model adapter...")
    model.save_pretrained("./smollm_tool_adapter")
    tokenizer.save_pretrained("./smollm_tool_adapter")
    
    print("✅ Model saved to './smollm_tool_adapter'")
    print("🏁 Training complete! Ready for testing.")
    
    # 9. Quick test
    print("\n🧪 Quick functionality test...")
    test_prompt = """<|im_start|>system
You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|>

<schema>
{
  "name": "get_stock_price",
  "description": "Return the latest price for a given ticker symbol.",
  "parameters": {
    "type": "object",
    "properties": {
      "ticker": {"type": "string"}
    },
    "required": ["ticker"]
  }
}
</schema>

<|im_start|>user
What's Microsoft trading at?<|im_end|>
<|im_start|>assistant
"""
    
    inputs = tokenizer(test_prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
    print(f"🤖 Model response: {response.strip()}")
    
    return model, tokenizer

if __name__ == "__main__":
    model, tokenizer = main()