Update app.py
Browse files
app.py
CHANGED
|
@@ -28,16 +28,30 @@ def respond(
|
|
| 28 |
temperature,
|
| 29 |
top_p,
|
| 30 |
):
|
| 31 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
messages = []
|
| 33 |
if system_message:
|
| 34 |
messages.append({"role": "system", "content": system_message})
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
messages.extend(
|
| 38 |
messages.append({"role": "user", "content": message})
|
| 39 |
|
| 40 |
-
# Generate with llama-cpp
|
| 41 |
out = llm.create_chat_completion(
|
| 42 |
messages=messages,
|
| 43 |
max_tokens=int(max_tokens),
|
|
|
|
| 28 |
temperature,
|
| 29 |
top_p,
|
| 30 |
):
|
| 31 |
+
# π 1. Safety: history can be None
|
| 32 |
+
if history is None:
|
| 33 |
+
history = []
|
| 34 |
+
|
| 35 |
+
# π 2. Truncate history so it doesn't grow forever
|
| 36 |
+
# 0 = stateless (no previous turns)
|
| 37 |
+
# 2 = last 2 exchanges, etc.
|
| 38 |
+
MAX_HISTORY_TURNS = 0 # for eval, I'd keep this at 0 or very small (e.g. 2)
|
| 39 |
+
|
| 40 |
+
if MAX_HISTORY_TURNS <= 0:
|
| 41 |
+
trimmed_history = []
|
| 42 |
+
else:
|
| 43 |
+
trimmed_history = history[-MAX_HISTORY_TURNS:]
|
| 44 |
+
|
| 45 |
+
# π 3. Build chat messages for llama-cpp
|
| 46 |
messages = []
|
| 47 |
if system_message:
|
| 48 |
messages.append({"role": "system", "content": system_message})
|
| 49 |
|
| 50 |
+
# Use ONLY trimmed history (or none)
|
| 51 |
+
messages.extend(trimmed_history)
|
| 52 |
messages.append({"role": "user", "content": message})
|
| 53 |
|
| 54 |
+
# π 4. Generate with llama-cpp
|
| 55 |
out = llm.create_chat_completion(
|
| 56 |
messages=messages,
|
| 57 |
max_tokens=int(max_tokens),
|