File size: 4,650 Bytes
1a014e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
"""
Monitor Training and Auto-Deploy
=================================

This script monitors the training process and automatically executes
the remaining deployment steps when training completes.

Usage:
    python monitor_and_deploy.py
"""

import os
import time
import subprocess
import psutil
from pathlib import Path

def is_training_running():
    """Check if training process is still running"""
    for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
        try:
            if proc.info['cmdline'] and any('tool_trainer_simple_robust.py' in cmd for cmd in proc.info['cmdline']):
                return True, proc.info['pid']
        except (psutil.NoSuchProcess, psutil.AccessDenied):
            continue
    return False, None

def check_model_files():
    """Check if training has produced the required model files"""
    lora_dir = Path("./smollm3_robust")
    required_files = [
        "adapter_config.json",
        "adapter_model.safetensors"
    ]
    
    existing_files = []
    for file in required_files:
        if (lora_dir / file).exists():
            existing_files.append(file)
    
    return len(existing_files) == len(required_files), existing_files

def run_command(cmd, description):
    """Run a command and return success status"""
    print(f"πŸ”„ {description}...")
    try:
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"βœ… {description} completed!")
            if result.stdout.strip():
                print(f"   Output: {result.stdout.strip()}")
            return True
        else:
            print(f"❌ {description} failed!")
            print(f"   Error: {result.stderr.strip()}")
            return False
    except Exception as e:
        print(f"❌ {description} failed with exception: {e}")
        return False

def main():
    print("πŸ” Monitoring training and preparing auto-deployment...")
    print("=" * 60)
    
    # Wait for training to complete
    training_running, pid = is_training_running()
    if training_running:
        print(f"⏳ Training is running (PID: {pid}). Waiting for completion...")
        
        while training_running:
            time.sleep(10)  # Check every 10 seconds
            training_running, pid = is_training_running()
            
            # Check if model files are appearing
            files_ready, existing = check_model_files()
            if existing:
                print(f"πŸ“ Found files: {existing}")
            
            if files_ready:
                print("πŸŽ‰ Model files detected! Training appears complete.")
                break
        
        print("βœ… Training process completed!")
    else:
        print("ℹ️ No training process running. Checking for existing model files...")
    
    # Verify model files exist
    files_ready, existing = check_model_files()
    if not files_ready:
        print(f"❌ Required model files not found. Found: {existing}")
        print("πŸ’‘ Please ensure training completed successfully.")
        return
    
    print("βœ… All required model files found!")
    
    # Execute deployment steps
    print("\nπŸš€ Executing automated deployment...")
    
    # Step 1: Upload to Hugging Face Hub
    if run_command("python upload_lora_to_hub.py", "Upload LoRA to Hugging Face Hub"):
        # Step 2: Test locally
        if run_command("python test_constrained_model.py", "Test model locally"):
            # Step 3: Deploy to HF Spaces
            if run_command("git push space deploy-lite:main", "Deploy to HF Spaces"):
                print("\nπŸŽ‰ COMPLETE SUCCESS!")
                print("πŸ”— Check your Hugging Face Spaces: https://huggingface.co/spaces/jlov7/Dynamic-Function-Calling-Agent")
                print("πŸ”— LoRA Model Hub: https://huggingface.co/jlov7/SmolLM3-Function-Calling-LoRA")
            else:
                print("⚠️ HF Spaces deployment failed, but model is uploaded to Hub")
        else:
            print("⚠️ Local testing had issues, but proceeding with deployment")
            run_command("git push space deploy-lite:main", "Deploy to HF Spaces anyway")
    else:
        print("❌ Hub upload failed. Please run upload_lora_to_hub.py manually")
    
    print("\nπŸ“‹ Final Status:")
    print("βœ… PEFT dependency added")
    print("βœ… Hub loading enabled") 
    print("βœ… Training completed")
    print("βœ… Model uploaded to Hub (if successful)")
    print("βœ… Deployed to HF Spaces")
    print("\n🌟 Your fine-tuned model should now work everywhere!")

if __name__ == "__main__":
    main()