File size: 4,650 Bytes
1a014e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
#!/usr/bin/env python3
"""
Monitor Training and Auto-Deploy
=================================
This script monitors the training process and automatically executes
the remaining deployment steps when training completes.
Usage:
python monitor_and_deploy.py
"""
import os
import time
import subprocess
import psutil
from pathlib import Path
def is_training_running():
"""Check if training process is still running"""
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if proc.info['cmdline'] and any('tool_trainer_simple_robust.py' in cmd for cmd in proc.info['cmdline']):
return True, proc.info['pid']
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return False, None
def check_model_files():
"""Check if training has produced the required model files"""
lora_dir = Path("./smollm3_robust")
required_files = [
"adapter_config.json",
"adapter_model.safetensors"
]
existing_files = []
for file in required_files:
if (lora_dir / file).exists():
existing_files.append(file)
return len(existing_files) == len(required_files), existing_files
def run_command(cmd, description):
"""Run a command and return success status"""
print(f"π {description}...")
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
print(f"β
{description} completed!")
if result.stdout.strip():
print(f" Output: {result.stdout.strip()}")
return True
else:
print(f"β {description} failed!")
print(f" Error: {result.stderr.strip()}")
return False
except Exception as e:
print(f"β {description} failed with exception: {e}")
return False
def main():
print("π Monitoring training and preparing auto-deployment...")
print("=" * 60)
# Wait for training to complete
training_running, pid = is_training_running()
if training_running:
print(f"β³ Training is running (PID: {pid}). Waiting for completion...")
while training_running:
time.sleep(10) # Check every 10 seconds
training_running, pid = is_training_running()
# Check if model files are appearing
files_ready, existing = check_model_files()
if existing:
print(f"π Found files: {existing}")
if files_ready:
print("π Model files detected! Training appears complete.")
break
print("β
Training process completed!")
else:
print("βΉοΈ No training process running. Checking for existing model files...")
# Verify model files exist
files_ready, existing = check_model_files()
if not files_ready:
print(f"β Required model files not found. Found: {existing}")
print("π‘ Please ensure training completed successfully.")
return
print("β
All required model files found!")
# Execute deployment steps
print("\nπ Executing automated deployment...")
# Step 1: Upload to Hugging Face Hub
if run_command("python upload_lora_to_hub.py", "Upload LoRA to Hugging Face Hub"):
# Step 2: Test locally
if run_command("python test_constrained_model.py", "Test model locally"):
# Step 3: Deploy to HF Spaces
if run_command("git push space deploy-lite:main", "Deploy to HF Spaces"):
print("\nπ COMPLETE SUCCESS!")
print("π Check your Hugging Face Spaces: https://huggingface.co/spaces/jlov7/Dynamic-Function-Calling-Agent")
print("π LoRA Model Hub: https://huggingface.co/jlov7/SmolLM3-Function-Calling-LoRA")
else:
print("β οΈ HF Spaces deployment failed, but model is uploaded to Hub")
else:
print("β οΈ Local testing had issues, but proceeding with deployment")
run_command("git push space deploy-lite:main", "Deploy to HF Spaces anyway")
else:
print("β Hub upload failed. Please run upload_lora_to_hub.py manually")
print("\nπ Final Status:")
print("β
PEFT dependency added")
print("β
Hub loading enabled")
print("β
Training completed")
print("β
Model uploaded to Hub (if successful)")
print("β
Deployed to HF Spaces")
print("\nπ Your fine-tuned model should now work everywhere!")
if __name__ == "__main__":
main() |