FatimaZh commited on
Commit
4900935
Β·
verified Β·
1 Parent(s): 29f672c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -28,16 +28,30 @@ def respond(
28
  temperature,
29
  top_p,
30
  ):
31
- # Build chat messages (OpenAI-style) for llama-cpp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  messages = []
33
  if system_message:
34
  messages.append({"role": "system", "content": system_message})
35
 
36
- # history already looks like [{"role": "...", "content": "..."}, ...]
37
- messages.extend(history)
38
  messages.append({"role": "user", "content": message})
39
 
40
- # Generate with llama-cpp chat API
41
  out = llm.create_chat_completion(
42
  messages=messages,
43
  max_tokens=int(max_tokens),
 
28
  temperature,
29
  top_p,
30
  ):
31
+ # πŸ‘‰ 1. Safety: history can be None
32
+ if history is None:
33
+ history = []
34
+
35
+ # πŸ‘‰ 2. Truncate history so it doesn't grow forever
36
+ # 0 = stateless (no previous turns)
37
+ # 2 = last 2 exchanges, etc.
38
+ MAX_HISTORY_TURNS = 0 # for eval, I'd keep this at 0 or very small (e.g. 2)
39
+
40
+ if MAX_HISTORY_TURNS <= 0:
41
+ trimmed_history = []
42
+ else:
43
+ trimmed_history = history[-MAX_HISTORY_TURNS:]
44
+
45
+ # πŸ‘‰ 3. Build chat messages for llama-cpp
46
  messages = []
47
  if system_message:
48
  messages.append({"role": "system", "content": system_message})
49
 
50
+ # Use ONLY trimmed history (or none)
51
+ messages.extend(trimmed_history)
52
  messages.append({"role": "user", "content": message})
53
 
54
+ # πŸ‘‰ 4. Generate with llama-cpp
55
  out = llm.create_chat_completion(
56
  messages=messages,
57
  max_tokens=int(max_tokens),