diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 79eac184a..868e236e6 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing): logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(f"{e} {e.__cause__}") + logger.info("DEBUG "*100) + + logger.info("REQUEST PROMPTS %s", request_prompts) + logger.info("CONVERSATION %s", conversation) + + # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input + # fed to the llm (so what we are looking for) + logger.info("ENGINE PROMPTS %s", engine_prompts) + request_id = "chatcmpl-" \ f"{self._base_request_id(raw_request, request.request_id)}" diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index c7030d34d..7a2765838 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser): self.tool_call_end_token: str = "" self.tool_call_regex = re.compile( - r"(.*?)|(.*)", re.DOTALL) + r"\s*(.*?)\s*(?:\s*|\s*|$)", re.DOTALL) self.scratch_pad_regex = re.compile( r"(.*?)", re.DOTALL) @@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser): # tag and end-of-string so the result of # findall is an array of tuples where one is a function call and # the other is None - function_call_tuples = ( - self.tool_call_regex.findall(model_output)) - - # load the JSON, and then use it to build the Function and - # Tool Call - raw_function_calls = [ - json.loads(match[0] if match[0] else match[1]) - for match in function_call_tuples - ] + matches = self.tool_call_regex.findall(model_output) + raw_function_calls = [] + for match in matches: + if not match: + continue + try: + parsed = json.loads(match.strip()) + raw_function_calls.append(parsed) + except json.JSONDecodeError as e: + logger.warning("Skipping malformed tool_call block: %s", e) + tool_calls = [ ToolCall( type="function", @@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser): ensure_ascii=False))) for function_call in raw_function_calls ] - - content = model_output[:model_output. - find(self.tool_call_start_token)] + tool_call_start = model_output.find(self.tool_call_start_token) + content = model_output[:tool_call_start] if tool_call_start >= 0 else None return ExtractedToolCallInformation( tools_called=True, tool_calls=tool_calls,