from typing import Dict, List, Any
from llama_cpp import Llama

MAX_TOKENS = 8192


class EndpointHandler():
    def __init__(self):
        # Initialize the model with your ComicBot configuration
        print("Initializing Llama model with ComicBot settings...")
        self.model = Llama.from_pretrained(
            "njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=8192)
        print("Model initialization complete.")

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        # Extract arguments from the data
        print("Extracting arguments from the data payload...")
        args = data.get("args", {})
        print(f"Arguments extracted: {args}")

        # Define the formatting template
        fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"

        # Check if args is properly formatted
        if not args:
            print("No arguments found in the data payload.")
            return [{
                "status": "error",
                "message": "No arguments found in the data payload."
            }]

        try:
            fmat = fmat.format(system_prompt=args.get(
                "system_prompt", ""), prompt=args.get("inputs", ""))
            print(f"Formatted prompt: {fmat}")
        except Exception as e:
            print(f"Error in formatting the prompt: {str(e)}")
            return [{
                "status": "error",
                "reason": "Invalid format",
                "detail": str(e)
            }]

        max_length = data.get("max_length", 512)
        try:
            max_length = int(max_length)
            print(f"Max length set to: {max_length}")
        except Exception as e:
            print(f"Error converting max_length to int: {str(e)}")
            return [{
                "status": "error",
                "reason": "max_length was passed as something that was not a plain old int",
                "detail": str(e)
            }]

        print("Generating response from the model...")
        res = self.model(fmat, temperature=args.get("temperature", 1.0), top_p=args.get(
            "top_p", 0.9), top_k=args.get("top_k", 40), max_tokens=max_length)
        print(f"Model response: {res}")

        return [{
            "status": "success",
            "response": res
        }]