Spaces:

XciD
/

vllm-debug

Runtime error

App Files Files Community

vllm-debug / patch1

XciD HF Staff

Upload 2 files

5f20b5a verified about 1 month ago

raw

history blame contribute delete

3.55 kB

	diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
	index 79eac184a..868e236e6 100644
	--- a/vllm/entrypoints/openai/serving_chat.py
	+++ b/vllm/entrypoints/openai/serving_chat.py
	@@ -200,6 +200,15 @@ class OpenAIServingChat(OpenAIServing):
	logger.exception("Error in preprocessing prompt inputs")
	return self.create_error_response(f"{e} {e.__cause__}")

	+ logger.info("DEBUG "*100)
	+
	+ logger.info("REQUEST PROMPTS %s", request_prompts)
	+ logger.info("CONVERSATION %s", conversation)
	+
	+ # Not sure to understand what the engine prompt is: from what I understand this is the raw /tokenized input
	+ # fed to the llm (so what we are looking for)
	+ logger.info("ENGINE PROMPTS %s", engine_prompts)
	+
	request_id = "chatcmpl-" \
	f"{self._base_request_id(raw_request, request.request_id)}"

	diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
	index c7030d34d..7a2765838 100644
	--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
	+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
	@@ -44,7 +44,7 @@ class Hermes2ProToolParser(ToolParser):
	self.tool_call_end_token: str = "</tool_call>"

	self.tool_call_regex = re.compile(
	- r"<tool_call>(.?)</tool_call>\|<tool_call>(.)", re.DOTALL)
	+ r"<tool_call>\s(.?)\s(?:<tool_call>\s\|</tool_call>\s*\|$)", re.DOTALL)
	self.scratch_pad_regex = re.compile(
	r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)

	@@ -80,15 +80,17 @@ class Hermes2ProToolParser(ToolParser):
	# tag and end-of-string so the result of
	# findall is an array of tuples where one is a function call and
	# the other is None
	- function_call_tuples = (
	- self.tool_call_regex.findall(model_output))
	-
	- # load the JSON, and then use it to build the Function and
	- # Tool Call
	- raw_function_calls = [
	- json.loads(match[0] if match[0] else match[1])
	- for match in function_call_tuples
	- ]
	+ matches = self.tool_call_regex.findall(model_output)
	+ raw_function_calls = []
	+ for match in matches:
	+ if not match:
	+ continue
	+ try:
	+ parsed = json.loads(match.strip())
	+ raw_function_calls.append(parsed)
	+ except json.JSONDecodeError as e:
	+ logger.warning("Skipping malformed tool_call block: %s", e)
	+
	tool_calls = [
	ToolCall(
	type="function",
	@@ -99,9 +101,8 @@ class Hermes2ProToolParser(ToolParser):
	ensure_ascii=False)))
	for function_call in raw_function_calls
	]
	-
	- content = model_output[:model_output.
	- find(self.tool_call_start_token)]
	+ tool_call_start = model_output.find(self.tool_call_start_token)
	+ content = model_output[:tool_call_start] if tool_call_start >= 0 else None
	return ExtractedToolCallInformation(
	tools_called=True,
	tool_calls=tool_calls,