from typing import List
import logging
from model import Model
import json, re
from PIL import Image
from types_io import ImageData

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
logger = logging.getLogger(__name__)

SCHEMA_JSON = json.dumps(ImageData.model_json_schema(), ensure_ascii=False)

LAND_USE_PROMPT = f"""
You are a structured image analysis agent.
Think **only** inside markers: ◁think▷ ... ◁/think▷
After ◁/think▷, output **ONLY** a JSON object that validates against this JSON Schema:
{SCHEMA_JSON}

Guidelines:
- Return 3–5 categories.
- Confidence is in [0,1] where 1.0 means highest confidence (be consistent).

Categories :

- Residenciales: Buildings intended for housing - Houses, PH Buildings, Condominiums.
- Comerciales1: Refers to the storage, distribution, or exchange of products, goods, or services with a commercial interest.
- Comerciales2: Buildings where activities aimed at providing services are carried out.
- Comerciales3: Buildings used for artisanal activities where raw materials are transformed on a local scale.
- Comerciales4: Hotels, Motels, and Restaurants.
- Comerciales5: Operational offices and warehouses.
- Centros_Comerciales: Commercial premises located on properties of one or several buildings.
- Bodegas: Buildings in warehouse-type constructions dedicated to commercial, industrial, or storage activities.
- Parqueaderos: Buildings designed for vehicle parking.
- Dotacionales1: Buildings where activities aimed at the welfare or service of a community are carried out.
- Dotacionales2: Buildings designed to carry out educational or training activities.
- Dotacionales3: Buildings with the necessary infrastructure to provide surgical and/or hospitalization services.
- Dotacionales4: Buildings for religious worship owned by communities or religious congregations.
- Dotacionales5: Theaters, cinemas, swimming pools, museums, sports, events, or shows.
- Especiales: Military administrative areas, cemeteries, airport runways.
- Moles: Large buildings in height (>4 floors) or area (>10,000 m²), usually under construction.
- Rurales: Sheds, kiosks, shelters, barns, stables, silos, etc. 
- Mixto1: (Residencial + Comercial1) Housing and commercial premises.
- Mixto2: (Residencial + Comercial2) Housing and offices.
- Mixto3: (Comercial1 + Comercial2) Commercial premises and offices.

Return ONLY the JSON object (no prose, no backticks) after ◁/think▷.
"""


class Classifier:
    def __init__(self, MAX_NEW_TOKENS: int = 1024):
        self.max_new_tokens = MAX_NEW_TOKENS

        logger.info("Initializing Classifier")
        logger.info("Loading model...")
        self.model = Model.load_model()
        logger.info("Loading processor...")
        self.processor = Model.load_processor()
        logger.info("Classifier initialization complete")
        logger.info("Setting up image data generator...")

    def get_response(self, images: List[Image.Image], saved_image_paths: List[str] = None) -> dict:
        logger.info(f"Processing classification request for {len(images)} images")
        
        logger.info("Loading and preprocessing images...")
        images = self.get_input_tensor(images)
        logger.debug("Successfully preprocessed images")

        logger.info("Preparing input messages...")
        messages = self.prepare_messages(saved_image_paths)

        response = self.generate_model_response(images, messages)

        # return {"output": response}
        think, json_text = self._split_think_and_json(response)
        data = json.loads(json_text)
        if isinstance(data, dict) and "think" not in data and think:
            data["think"] = think

        # Validate against Pydantic schema; raise on failure
        validated = ImageData.model_validate(data)
        return {"output": validated.model_dump()}


    def get_input_tensor(self, images: List[Image.Image]) -> List[Image.Image]:
        """
        Preprocess a list of PIL images.
        Args:
            images (List[Image.Image]): List of PIL images to be processed.
        Returns:
            List[Image.Image]: List of preprocessed images ready for classification.
        """
        if not images:
            raise ValueError("No images provided for classification.")

        logger.info(f"Preprocessing {len(images)} images...")
        processed_images = []
        for idx, img in enumerate(images):
            logger.debug(f"Processing image at index: {idx}")
            try:
                img = self.resize_image(img)
                processed_images.append(img)
                logger.debug(f"Successfully processed image at index: {idx}")
            except Exception as e:
                logger.error(f"Error processing image at index {idx}: {str(e)}")
                raise
        return processed_images

    def generate_model_response(self, images: List[Image.Image], messages: List[dict]) -> str:
        """
        Generate response from the model.
        Args:
            images (List[Image.Image]): List of preprocessed images.
            messages (List[dict]): Messages for the processor.
        Returns:
            str: Decoded response from the model.
        """
        logger.info("Applying chat template...")
        try:
            # Get the text as string first, then let outlines handle tokenization
            text = self.processor.apply_chat_template(
                messages, add_generation_prompt=True, return_tensors="pt"
            )
            logger.info(f"Text length: {len(text)} characters")
            
            inputs = self.processor(images=images, text=text, return_tensors="pt", padding=True, truncation=True).to(self.model.device)


        except Exception as e:
            logger.error(f"Error applying chat template: {str(e)}")
            raise
        
        logger.info("Generating response...")
        generated_ids = self.model.generate(**inputs, max_new_tokens=1024, temperature=0.1)
        generated_ids_trimmed = [
                out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
            ]
        
        response = self.processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )[0]

        logger.debug("Successfully generated response")
        return response


    @staticmethod
    def resize_image(image: Image.Image, max_size: int = 224) -> Image.Image:
        """
        Resize an image while maintaining aspect ratio.

        Args:
            image: PIL Image object to resize
            max_size: Maximum dimension (width or height) of the output image

        Returns:
            PIL Image: Resized image with maintained aspect ratio
        """
        # Get current dimensions
        width, height = image.size

        # Calculate scaling factor to fit within max_size
        scale = min(max_size / width, max_size / height)

        # Only resize if image is larger than max_size
        if scale < 1:
            new_width = int(width * scale)
            new_height = int(height * scale)
            image = image.resize(
                (new_width, new_height),
                Image.Resampling.LANCZOS
            )

        return image

    @staticmethod
    def prepare_messages(saved_image_paths: List[str]) -> List[dict]:
        """
        Prepare messages for the processor.
        Args:
            saved_image_paths (List[str]): List of paths to saved images.
            classification_prompt (str): The prompt for classification.
        Returns:
            List[dict]: Messages for the processor.
        """
        return [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image_path} for image_path in saved_image_paths
                ] + [{"type": "text", "text": LAND_USE_PROMPT}],
            },
        ]
    
    @staticmethod
    def _split_think_and_json(text: str):
        start, end = "◁think▷", "◁/think▷"
        think = ""
        after = text
           
        if start in text and end in text:
            s = text.find(start) + len(start)
            e = text.find(end, s)
            think = text[s:e].strip()
            after = text[e+len(end):].strip()
        m = re.search(r"\{.*\}", after, flags=re.S)
        
        json_text = m.group(0) if m else after
        return think, json_text