Spaces:

FatimaZh
/

iris

Sleeping

App Files Files Community

FatimaZh commited on 11 days ago

Commit

33a1d4c

verified ·

1 Parent(s): 4900935

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -2,22 +2,21 @@ import os
 import gradio as gr
 from llama_cpp import Llama
-# 🔐 HF token from Space secrets
 HF_TOKEN = os.environ.get("scalable")
-# 🧠 Your GGUF repo on Hugging Face
 MODEL_ID = "FatimaZh/llama-3.2-1b-merged-code-gguf-v2"
-# ❗ Replace this with the EXACT gguf filename in that repo
-GGUF_FILENAME = "merged_fp16_code_v2.gguf"  # <-- CHANGE ME
-# 🧠 Load the GGUF model via llama-cpp
 llm = Llama.from_pretrained(
     repo_id=MODEL_ID,
     filename=GGUF_FILENAME,
     hf_token=HF_TOKEN,
     n_ctx=4096,      # context length
-    n_gpu_layers=-1, # -1 = all layers on GPU if available, 0 = CPU only
 )
 def respond(
@@ -28,21 +27,21 @@ def respond(
     temperature,
     top_p,
 ):
-    # 👉 1. Safety: history can be None
     if history is None:
         history = []
-    # 👉 2. Truncate history so it doesn't grow forever
     #    0  = stateless (no previous turns)
     #    2  = last 2 exchanges, etc.
-    MAX_HISTORY_TURNS = 0  # for eval, I'd keep this at 0 or very small (e.g. 2)
     if MAX_HISTORY_TURNS <= 0:
         trimmed_history = []
     else:
         trimmed_history = history[-MAX_HISTORY_TURNS:]
-    # 👉 3. Build chat messages for llama-cpp
     messages = []
     if system_message:
         messages.append({"role": "system", "content": system_message})
@@ -51,7 +50,7 @@ def respond(
     messages.extend(trimmed_history)
     messages.append({"role": "user", "content": message})
-    # 👉 4. Generate with llama-cpp
     out = llm.create_chat_completion(
         messages=messages,
         max_tokens=int(max_tokens),
@@ -61,7 +60,7 @@ def respond(
     return out["choices"][0]["message"]["content"]
-# 🎨 UI
 with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
     gr.Markdown(
         """
@@ -73,7 +72,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
     )
     with gr.Row():
-        # 🗨️ Chat area
         with gr.Column(scale=3):
             gr.ChatInterface(
                 fn=respond,
@@ -108,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
                 ],
             )
-        # ℹ️ Side info
         with gr.Column(scale=1):
             gr.Markdown(
                 f"""
@@ -125,4 +124,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
             )
 if __name__ == "__main__":
-    demo.launch(share=True, show_error=True)

 import gradio as gr
 from llama_cpp import Llama
 HF_TOKEN = os.environ.get("scalable")
 MODEL_ID = "FatimaZh/llama-3.2-1b-merged-code-gguf-v2"
+GGUF_FILENAME = "merged_fp16_code_v2.gguf"
+# Load the GGUF model via llama-cpp
 llm = Llama.from_pretrained(
     repo_id=MODEL_ID,
     filename=GGUF_FILENAME,
     hf_token=HF_TOKEN,
     n_ctx=4096,      # context length
+    n_gpu_layers=-1, # -1 = all layers on GPU if available
 )
 def respond(
     temperature,
     top_p,
 ):
+    # 1. Safety: history can be None
     if history is None:
         history = []
+    #   2. Truncate history so it doesn't grow forever
     #    0  = stateless (no previous turns)
     #    2  = last 2 exchanges, etc.
+    MAX_HISTORY_TURNS = 0
     if MAX_HISTORY_TURNS <= 0:
         trimmed_history = []
     else:
         trimmed_history = history[-MAX_HISTORY_TURNS:]
+    # 3. Build chat messages for llama-cpp
     messages = []
     if system_message:
         messages.append({"role": "system", "content": system_message})
     messages.extend(trimmed_history)
     messages.append({"role": "user", "content": message})
+    #  4. Generate with llama-cpp
     out = llm.create_chat_completion(
         messages=messages,
         max_tokens=int(max_tokens),
     return out["choices"][0]["message"]["content"]
+#  UI
 with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
     gr.Markdown(
         """
     )
     with gr.Row():
+        # Chat area
         with gr.Column(scale=3):
             gr.ChatInterface(
                 fn=respond,
                 ],
             )
+        # Side info
         with gr.Column(scale=1):
             gr.Markdown(
                 f"""
             )
 if __name__ == "__main__":
+    demo.launch(share=True)