#!/usr/bin/env python3 """ Monitor Training and Auto-Deploy ================================= This script monitors the training process and automatically executes the remaining deployment steps when training completes. Usage: python monitor_and_deploy.py """ import os import time import subprocess import psutil from pathlib import Path def is_training_running(): """Check if training process is still running""" for proc in psutil.process_iter(['pid', 'name', 'cmdline']): try: if proc.info['cmdline'] and any('tool_trainer_simple_robust.py' in cmd for cmd in proc.info['cmdline']): return True, proc.info['pid'] except (psutil.NoSuchProcess, psutil.AccessDenied): continue return False, None def check_model_files(): """Check if training has produced the required model files""" lora_dir = Path("./smollm3_robust") required_files = [ "adapter_config.json", "adapter_model.safetensors" ] existing_files = [] for file in required_files: if (lora_dir / file).exists(): existing_files.append(file) return len(existing_files) == len(required_files), existing_files def run_command(cmd, description): """Run a command and return success status""" print(f"šŸ”„ {description}...") try: result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode == 0: print(f"āœ… {description} completed!") if result.stdout.strip(): print(f" Output: {result.stdout.strip()}") return True else: print(f"āŒ {description} failed!") print(f" Error: {result.stderr.strip()}") return False except Exception as e: print(f"āŒ {description} failed with exception: {e}") return False def main(): print("šŸ” Monitoring training and preparing auto-deployment...") print("=" * 60) # Wait for training to complete training_running, pid = is_training_running() if training_running: print(f"ā³ Training is running (PID: {pid}). Waiting for completion...") while training_running: time.sleep(10) # Check every 10 seconds training_running, pid = is_training_running() # Check if model files are appearing files_ready, existing = check_model_files() if existing: print(f"šŸ“ Found files: {existing}") if files_ready: print("šŸŽ‰ Model files detected! Training appears complete.") break print("āœ… Training process completed!") else: print("ā„¹ļø No training process running. Checking for existing model files...") # Verify model files exist files_ready, existing = check_model_files() if not files_ready: print(f"āŒ Required model files not found. Found: {existing}") print("šŸ’” Please ensure training completed successfully.") return print("āœ… All required model files found!") # Execute deployment steps print("\nšŸš€ Executing automated deployment...") # Step 1: Upload to Hugging Face Hub if run_command("python upload_lora_to_hub.py", "Upload LoRA to Hugging Face Hub"): # Step 2: Test locally if run_command("python test_constrained_model.py", "Test model locally"): # Step 3: Deploy to HF Spaces if run_command("git push space deploy-lite:main", "Deploy to HF Spaces"): print("\nšŸŽ‰ COMPLETE SUCCESS!") print("šŸ”— Check your Hugging Face Spaces: https://huggingface.co/spaces/jlov7/Dynamic-Function-Calling-Agent") print("šŸ”— LoRA Model Hub: https://huggingface.co/jlov7/SmolLM3-Function-Calling-LoRA") else: print("āš ļø HF Spaces deployment failed, but model is uploaded to Hub") else: print("āš ļø Local testing had issues, but proceeding with deployment") run_command("git push space deploy-lite:main", "Deploy to HF Spaces anyway") else: print("āŒ Hub upload failed. Please run upload_lora_to_hub.py manually") print("\nšŸ“‹ Final Status:") print("āœ… PEFT dependency added") print("āœ… Hub loading enabled") print("āœ… Training completed") print("āœ… Model uploaded to Hub (if successful)") print("āœ… Deployed to HF Spaces") print("\n🌟 Your fine-tuned model should now work everywhere!") if __name__ == "__main__": main()