FatimaZh commited on
Commit
33a1d4c
Β·
verified Β·
1 Parent(s): 4900935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -2,22 +2,21 @@ import os
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
- # πŸ” HF token from Space secrets
6
  HF_TOKEN = os.environ.get("scalable")
7
 
8
- # 🧠 Your GGUF repo on Hugging Face
9
  MODEL_ID = "FatimaZh/llama-3.2-1b-merged-code-gguf-v2"
10
 
11
- # ❗ Replace this with the EXACT gguf filename in that repo
12
- GGUF_FILENAME = "merged_fp16_code_v2.gguf" # <-- CHANGE ME
13
 
14
- # 🧠 Load the GGUF model via llama-cpp
15
  llm = Llama.from_pretrained(
16
  repo_id=MODEL_ID,
17
  filename=GGUF_FILENAME,
18
  hf_token=HF_TOKEN,
19
  n_ctx=4096, # context length
20
- n_gpu_layers=-1, # -1 = all layers on GPU if available, 0 = CPU only
21
  )
22
 
23
  def respond(
@@ -28,21 +27,21 @@ def respond(
28
  temperature,
29
  top_p,
30
  ):
31
- # πŸ‘‰ 1. Safety: history can be None
32
  if history is None:
33
  history = []
34
 
35
- # πŸ‘‰ 2. Truncate history so it doesn't grow forever
36
  # 0 = stateless (no previous turns)
37
  # 2 = last 2 exchanges, etc.
38
- MAX_HISTORY_TURNS = 0 # for eval, I'd keep this at 0 or very small (e.g. 2)
39
 
40
  if MAX_HISTORY_TURNS <= 0:
41
  trimmed_history = []
42
  else:
43
  trimmed_history = history[-MAX_HISTORY_TURNS:]
44
 
45
- # πŸ‘‰ 3. Build chat messages for llama-cpp
46
  messages = []
47
  if system_message:
48
  messages.append({"role": "system", "content": system_message})
@@ -51,7 +50,7 @@ def respond(
51
  messages.extend(trimmed_history)
52
  messages.append({"role": "user", "content": message})
53
 
54
- # πŸ‘‰ 4. Generate with llama-cpp
55
  out = llm.create_chat_completion(
56
  messages=messages,
57
  max_tokens=int(max_tokens),
@@ -61,7 +60,7 @@ def respond(
61
 
62
  return out["choices"][0]["message"]["content"]
63
 
64
- # 🎨 UI
65
  with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
66
  gr.Markdown(
67
  """
@@ -73,7 +72,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
73
  )
74
 
75
  with gr.Row():
76
- # πŸ—¨οΈ Chat area
77
  with gr.Column(scale=3):
78
  gr.ChatInterface(
79
  fn=respond,
@@ -108,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
108
  ],
109
  )
110
 
111
- # ℹ️ Side info
112
  with gr.Column(scale=1):
113
  gr.Markdown(
114
  f"""
@@ -125,4 +124,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
125
  )
126
 
127
  if __name__ == "__main__":
128
- demo.launch(share=True, show_error=True)
 
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
+
6
  HF_TOKEN = os.environ.get("scalable")
7
 
8
+
9
  MODEL_ID = "FatimaZh/llama-3.2-1b-merged-code-gguf-v2"
10
 
11
+ GGUF_FILENAME = "merged_fp16_code_v2.gguf"
 
12
 
13
+ # Load the GGUF model via llama-cpp
14
  llm = Llama.from_pretrained(
15
  repo_id=MODEL_ID,
16
  filename=GGUF_FILENAME,
17
  hf_token=HF_TOKEN,
18
  n_ctx=4096, # context length
19
+ n_gpu_layers=-1, # -1 = all layers on GPU if available
20
  )
21
 
22
  def respond(
 
27
  temperature,
28
  top_p,
29
  ):
30
+ # 1. Safety: history can be None
31
  if history is None:
32
  history = []
33
 
34
+ # 2. Truncate history so it doesn't grow forever
35
  # 0 = stateless (no previous turns)
36
  # 2 = last 2 exchanges, etc.
37
+ MAX_HISTORY_TURNS = 0
38
 
39
  if MAX_HISTORY_TURNS <= 0:
40
  trimmed_history = []
41
  else:
42
  trimmed_history = history[-MAX_HISTORY_TURNS:]
43
 
44
+ # 3. Build chat messages for llama-cpp
45
  messages = []
46
  if system_message:
47
  messages.append({"role": "system", "content": system_message})
 
50
  messages.extend(trimmed_history)
51
  messages.append({"role": "user", "content": message})
52
 
53
+ # 4. Generate with llama-cpp
54
  out = llm.create_chat_completion(
55
  messages=messages,
56
  max_tokens=int(max_tokens),
 
60
 
61
  return out["choices"][0]["message"]["content"]
62
 
63
+ # UI
64
  with gr.Blocks(theme=gr.themes.Soft(), title="Khadija Chatbot") as demo:
65
  gr.Markdown(
66
  """
 
72
  )
73
 
74
  with gr.Row():
75
+ # Chat area
76
  with gr.Column(scale=3):
77
  gr.ChatInterface(
78
  fn=respond,
 
107
  ],
108
  )
109
 
110
+ # Side info
111
  with gr.Column(scale=1):
112
  gr.Markdown(
113
  f"""
 
124
  )
125
 
126
  if __name__ == "__main__":
127
+ demo.launch(share=True)