File size: 3,551 Bytes
5f20b5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 79eac184a..868e236e6 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing):
             logger.exception("Error in preprocessing prompt inputs")
             return self.create_error_response(f"{e} {e.__cause__}")
 
+        logger.info("DEBUG "*100)
+
+        logger.info("REQUEST PROMPTS %s", request_prompts)
+        logger.info("CONVERSATION %s", conversation)
+
+        # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input
+        # fed to the llm (so what we are looking for)
+        logger.info("ENGINE PROMPTS %s", engine_prompts)
+
         request_id = "chatcmpl-" \
                      f"{self._base_request_id(raw_request, request.request_id)}"
 
diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
index c7030d34d..7a2765838 100644
--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser):
         self.tool_call_end_token: str = "</tool_call>"
 
         self.tool_call_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
+            r"<tool_call>\s*(.*?)\s*(?:<tool_call>\s*|</tool_call>\s*|$)", re.DOTALL)
         self.scratch_pad_regex = re.compile(
             r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)
 
@@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser):
                 # tag and end-of-string so the result of
                 # findall is an array of tuples where one is a function call and
                 # the other is None
-                function_call_tuples = (
-                    self.tool_call_regex.findall(model_output))
-
-                # load the JSON, and then use it to build the Function and
-                # Tool Call
-                raw_function_calls = [
-                    json.loads(match[0] if match[0] else match[1])
-                    for match in function_call_tuples
-                ]
+                matches = self.tool_call_regex.findall(model_output)
+                raw_function_calls = []
+                for match in matches:
+                    if not match:
+                        continue
+                    try:
+                        parsed = json.loads(match.strip())
+                        raw_function_calls.append(parsed)
+                    except json.JSONDecodeError as e:
+                        logger.warning("Skipping malformed tool_call block: %s", e)
+
                 tool_calls = [
                     ToolCall(
                         type="function",
@@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser):
                                                  ensure_ascii=False)))
                     for function_call in raw_function_calls
                 ]
-
-                content = model_output[:model_output.
-                                       find(self.tool_call_start_token)]
+                tool_call_start = model_output.find(self.tool_call_start_token)
+                content = model_output[:tool_call_start] if tool_call_start >= 0 else None
                 return ExtractedToolCallInformation(
                     tools_called=True,
                     tool_calls=tool_calls,