from typing import Optional import spaces import gradio as gr import numpy as np import torch from PIL import Image import io import base64, os from huggingface_hub import snapshot_download import traceback import warnings import sys # Suppress warnings warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", message=".*_supports_sdpa.*") # Simple monkey patch for transformers - avoid recursion def simple_patch_transformers(): """Simple patch to fix _supports_sdpa issue""" try: import transformers.modeling_utils as modeling_utils # Store original method original_check = modeling_utils.PreTrainedModel._check_and_adjust_attn_implementation def patched_check(self, *args, **kwargs): # Simply set the attribute if it doesn't exist if not hasattr(self, '_supports_sdpa'): object.__setattr__(self, '_supports_sdpa', False) try: return original_check(self, *args, **kwargs) except AttributeError as e: if '_supports_sdpa' in str(e): # Return default attention implementation return "eager" raise modeling_utils.PreTrainedModel._check_and_adjust_attn_implementation = patched_check print("Applied simple transformers patch") except Exception as e: print(f"Warning: Could not patch transformers: {e}") # Apply the patch BEFORE importing utils simple_patch_transformers() # Now import the utils from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img # Download repository repo_id = "microsoft/OmniParser-v2.0" local_dir = "weights" if not os.path.exists(local_dir): snapshot_download(repo_id=repo_id, local_dir=local_dir) print(f"Repository downloaded to: {local_dir}") else: print(f"Weights already exist at: {local_dir}") # Custom function to load caption model def load_caption_model_safe(model_name="florence2", model_name_or_path="weights/icon_caption"): """Safely load caption model""" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Method 1: Try original function try: return get_caption_model_processor(model_name, model_name_or_path) except Exception as e: print(f"Original loading failed: {e}, trying alternative...") # Method 2: Load with specific configs try: from transformers import AutoProcessor, AutoModelForCausalLM print(f"Loading caption model from {model_name_or_path}...") processor = AutoProcessor.from_pretrained( model_name_or_path, trust_remote_code=True ) # Load model with safer config model = AutoModelForCausalLM.from_pretrained( model_name_or_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True, attn_implementation="eager", # Use eager attention low_cpu_mem_usage=True ) # Ensure attribute exists (using object.__setattr__ to avoid recursion) if not hasattr(model, '_supports_sdpa'): object.__setattr__(model, '_supports_sdpa', False) if device.type == 'cuda': model = model.to(device) print("Model loaded successfully with alternative method") return {'model': model, 'processor': processor} except Exception as e: print(f"Alternative loading also failed: {e}") # Method 3: Manual loading as last resort try: print("Attempting manual model loading...") # Import required modules from transformers import AutoProcessor, AutoConfig import importlib.util # Load processor processor = AutoProcessor.from_pretrained( model_name_or_path, trust_remote_code=True ) # Load config config = AutoConfig.from_pretrained( model_name_or_path, trust_remote_code=True ) # Manually import and instantiate model model_file = os.path.join(model_name_or_path, "modeling_florence2.py") if os.path.exists(model_file): spec = importlib.util.spec_from_file_location("modeling_florence2_custom", model_file) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) # Get model class if hasattr(module, 'Florence2ForConditionalGeneration'): model_class = module.Florence2ForConditionalGeneration # Create model instance model = model_class(config) # Set the attribute before loading weights object.__setattr__(model, '_supports_sdpa', False) # Load weights weight_file = os.path.join(model_name_or_path, "model.safetensors") if os.path.exists(weight_file): from safetensors.torch import load_file state_dict = load_file(weight_file) model.load_state_dict(state_dict, strict=False) if device.type == 'cuda': model = model.to(device) model = model.half() # Use half precision print("Model loaded successfully with manual method") return {'model': model, 'processor': processor} except Exception as e: print(f"Manual loading failed: {e}") raise RuntimeError(f"Could not load model with any method: {e}") # Load models try: print("Loading YOLO model...") yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt') print("YOLO model loaded successfully") print("Loading caption model...") caption_model_processor = load_caption_model_safe() print("Caption model loaded successfully") except Exception as e: print(f"Critical error loading models: {e}") print(traceback.format_exc()) caption_model_processor = None yolo_model = None # UI Configuration MARKDOWN = """ # OmniParser V2 Pro🔥
🎯 AI-powered screen understanding tool that detects UI elements and extracts text with high accuracy.
📝 Supports both PaddleOCR and EasyOCR for flexible text extraction.