njwright92
/

ComicBot_v.2-gguf

@@ -1,43 +1,65 @@
-import json
-import os
 from typing import Dict, List, Any
 from llama_cpp import Llama
-import gemma_tools as gem
 MAX_TOKENS = 8192
 class EndpointHandler():
-    def __init__(self, data):
-        # Updated the model path and filename with your ComicBot model
-        self.model = Llama.from_pretrained("njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=8192)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        args = gem.get_args_or_none(data)
         fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
-        print(args, fmat)
-        if not args[0]:
-            return {
-                "status": args["status"],
-                "message": args["description"]
-            }
         try:
-            fmat = fmat.format(system_prompt=args["system_prompt"], prompt=args["inputs"])
         except Exception as e:
-            return json.dumps({
                 "status": "error",
-                "reason": "invalid format"
-            })
-        max_length = data.pop("max_length", 512)
         try:
             max_length = int(max_length)
         except Exception as e:
-            return json.dumps({
                 "status": "error",
-                "reason": "max_length was passed as something that was absolutely not a plain old int"
-            })
-        res = self.model(fmat, temperature=args["temperature"], top_p=args["top_p"], top_k=args["top_k"], max_tokens=max_length)
-        return res

 from typing import Dict, List, Any
 from llama_cpp import Llama
 MAX_TOKENS = 8192
 class EndpointHandler():
+    def __init__(self):
+        # Initialize the model with your ComicBot configuration
+        print("Initializing Llama model with ComicBot settings...")
+        self.model = Llama.from_pretrained(
+            "njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=8192)
+        print("Model initialization complete.")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        # Extract arguments from the data
+        print("Extracting arguments from the data payload...")
+        args = data.get("args", {})
+        print(f"Arguments extracted: {args}")
+        # Define the formatting template
         fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
+        # Check if args is properly formatted
+        if not args:
+            print("No arguments found in the data payload.")
+            return [{
+                "status": "error",
+                "message": "No arguments found in the data payload."
+            }]
         try:
+            fmat = fmat.format(system_prompt=args.get(
+                "system_prompt", ""), prompt=args.get("inputs", ""))
+            print(f"Formatted prompt: {fmat}")
         except Exception as e:
+            print(f"Error in formatting the prompt: {str(e)}")
+            return [{
                 "status": "error",
+                "reason": "Invalid format",
+                "detail": str(e)
+            }]
+        max_length = data.get("max_length", 512)
         try:
             max_length = int(max_length)
+            print(f"Max length set to: {max_length}")
         except Exception as e:
+            print(f"Error converting max_length to int: {str(e)}")
+            return [{
                 "status": "error",
+                "reason": "max_length was passed as something that was not a plain old int",
+                "detail": str(e)
+            }]
+        print("Generating response from the model...")
+        res = self.model(fmat, temperature=args.get("temperature", 1.0), top_p=args.get(
+            "top_p", 0.9), top_k=args.get("top_k", 40), max_tokens=max_length)
+        print(f"Model response: {res}")
+        return [{
+            "status": "success",
+            "response": res
+        }]