from typing import Dict, List, Any from llama_cpp import Llama MAX_TOKENS = 8192 class EndpointHandler(): def __init__(self): # Initialize the model with your ComicBot configuration print("Initializing Llama model with ComicBot settings...") self.model = Llama.from_pretrained( "njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=8192) print("Model initialization complete.") def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: # Extract arguments from the data print("Extracting arguments from the data payload...") args = data.get("args", {}) print(f"Arguments extracted: {args}") # Define the formatting template fmat = "system\n{system_prompt} \nuser\n{prompt} \nmodel" # Check if args is properly formatted if not args: print("No arguments found in the data payload.") return [{ "status": "error", "message": "No arguments found in the data payload." }] try: fmat = fmat.format(system_prompt=args.get( "system_prompt", ""), prompt=args.get("inputs", "")) print(f"Formatted prompt: {fmat}") except Exception as e: print(f"Error in formatting the prompt: {str(e)}") return [{ "status": "error", "reason": "Invalid format", "detail": str(e) }] max_length = data.get("max_length", 512) try: max_length = int(max_length) print(f"Max length set to: {max_length}") except Exception as e: print(f"Error converting max_length to int: {str(e)}") return [{ "status": "error", "reason": "max_length was passed as something that was not a plain old int", "detail": str(e) }] print("Generating response from the model...") res = self.model(fmat, temperature=args.get("temperature", 1.0), top_p=args.get( "top_p", 0.9), top_k=args.get("top_k", 40), max_tokens=max_length) print(f"Model response: {res}") return [{ "status": "success", "response": res }]