Spaces:
Sleeping
Sleeping
Cleaning up imports
Browse files- agent_server/agent_streaming.py +5 -5
- agent_server/chat_completions.py +7 -8
- agent_server/helpers.py +3 -5
- agent_server/models.py +2 -1
- agents/code_writing_agents.py +1 -0
- agents/generator_and_critic.py +3 -3
- agents/json_tool_calling_agents.py +1 -0
- proxy.py +26 -27
agent_server/agent_streaming.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
import os
|
| 2 |
import asyncio
|
| 3 |
import contextlib
|
|
|
|
| 4 |
import threading
|
| 5 |
import time
|
| 6 |
import typing
|
|
@@ -8,7 +8,7 @@ import typing
|
|
| 8 |
import fastapi
|
| 9 |
import httpx
|
| 10 |
|
| 11 |
-
from agent_server.helpers import
|
| 12 |
from agent_server.sanitizing_think_tags import scrub_think_tags
|
| 13 |
from agent_server.std_tee import QueueWriter, _serialize_step
|
| 14 |
|
|
@@ -191,10 +191,10 @@ def _recursively_scrub(obj):
|
|
| 191 |
return obj
|
| 192 |
|
| 193 |
|
| 194 |
-
async def
|
| 195 |
body: dict, stream: bool, scrub_think: bool = False
|
| 196 |
):
|
| 197 |
-
HF_TOKEN=os.getenv("OPENAI_API_KEY")
|
| 198 |
headers = {
|
| 199 |
"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
|
| 200 |
"Content-Type": "application/json",
|
|
@@ -223,7 +223,7 @@ async def _proxy_upstream_chat_completions(
|
|
| 223 |
yield chunk
|
| 224 |
|
| 225 |
return fastapi.responses.StreamingResponse(
|
| 226 |
-
proxy_stream(), media_type="text/event-stream", headers=
|
| 227 |
)
|
| 228 |
else:
|
| 229 |
async with httpx.AsyncClient(timeout=None) as client:
|
|
|
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import contextlib
|
| 3 |
+
import os
|
| 4 |
import threading
|
| 5 |
import time
|
| 6 |
import typing
|
|
|
|
| 8 |
import fastapi
|
| 9 |
import httpx
|
| 10 |
|
| 11 |
+
from agent_server.helpers import sse_headers
|
| 12 |
from agent_server.sanitizing_think_tags import scrub_think_tags
|
| 13 |
from agent_server.std_tee import QueueWriter, _serialize_step
|
| 14 |
|
|
|
|
| 191 |
return obj
|
| 192 |
|
| 193 |
|
| 194 |
+
async def proxy_upstream_chat_completions(
|
| 195 |
body: dict, stream: bool, scrub_think: bool = False
|
| 196 |
):
|
| 197 |
+
HF_TOKEN = os.getenv("OPENAI_API_KEY")
|
| 198 |
headers = {
|
| 199 |
"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
|
| 200 |
"Content-Type": "application/json",
|
|
|
|
| 223 |
yield chunk
|
| 224 |
|
| 225 |
return fastapi.responses.StreamingResponse(
|
| 226 |
+
proxy_stream(), media_type="text/event-stream", headers=sse_headers()
|
| 227 |
)
|
| 228 |
else:
|
| 229 |
async with httpx.AsyncClient(timeout=None) as client:
|
agent_server/chat_completions.py
CHANGED
|
@@ -21,11 +21,10 @@ from agents.json_tool_calling_agents import (
|
|
| 21 |
generate_tool_calling_agent_with_search_and_code,
|
| 22 |
)
|
| 23 |
|
| 24 |
-
|
| 25 |
AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
|
| 26 |
|
| 27 |
|
| 28 |
-
def
|
| 29 |
"""
|
| 30 |
Accepts either a bare model string or {"id": "..."} form; default to the
|
| 31 |
local code-writing agent if unspecified.
|
|
@@ -37,15 +36,15 @@ def _normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
|
|
| 37 |
return "code-writing-agent-without-tools"
|
| 38 |
|
| 39 |
|
| 40 |
-
def
|
| 41 |
return model_name == AGENT_MODEL
|
| 42 |
|
| 43 |
|
| 44 |
-
def
|
| 45 |
return model_name == f"{AGENT_MODEL}-nothink"
|
| 46 |
|
| 47 |
|
| 48 |
-
def
|
| 49 |
body: ChatCompletionRequest, messages: typing.List[ChatMessage]
|
| 50 |
) -> ChatCompletionRequest:
|
| 51 |
"""
|
|
@@ -67,7 +66,7 @@ def _apply_nothink_to_body(
|
|
| 67 |
return new_body
|
| 68 |
|
| 69 |
|
| 70 |
-
def
|
| 71 |
"""
|
| 72 |
Returns an instantiated agent for the given local model id.
|
| 73 |
Raises ValueError on unknown local ids.
|
|
@@ -118,7 +117,7 @@ def _truncate_reasoning_blob(reasoning: str, limit: int = 24000) -> str:
|
|
| 118 |
return reasoning
|
| 119 |
|
| 120 |
|
| 121 |
-
def
|
| 122 |
task: str,
|
| 123 |
agent_for_request: typing.Any,
|
| 124 |
model_name: str,
|
|
@@ -225,7 +224,7 @@ def _make_sse_generator(
|
|
| 225 |
return _gen
|
| 226 |
|
| 227 |
|
| 228 |
-
async def
|
| 229 |
"""
|
| 230 |
Runs the agent and returns a single OpenAI-style text (with optional <think> block).
|
| 231 |
"""
|
|
|
|
| 21 |
generate_tool_calling_agent_with_search_and_code,
|
| 22 |
)
|
| 23 |
|
|
|
|
| 24 |
AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
|
| 25 |
|
| 26 |
|
| 27 |
+
def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
|
| 28 |
"""
|
| 29 |
Accepts either a bare model string or {"id": "..."} form; default to the
|
| 30 |
local code-writing agent if unspecified.
|
|
|
|
| 36 |
return "code-writing-agent-without-tools"
|
| 37 |
|
| 38 |
|
| 39 |
+
def is_upstream_passthrough(model_name: str) -> bool:
|
| 40 |
return model_name == AGENT_MODEL
|
| 41 |
|
| 42 |
|
| 43 |
+
def is_upstream_passthrough_nothink(model_name: str) -> bool:
|
| 44 |
return model_name == f"{AGENT_MODEL}-nothink"
|
| 45 |
|
| 46 |
|
| 47 |
+
def apply_nothink_to_body(
|
| 48 |
body: ChatCompletionRequest, messages: typing.List[ChatMessage]
|
| 49 |
) -> ChatCompletionRequest:
|
| 50 |
"""
|
|
|
|
| 66 |
return new_body
|
| 67 |
|
| 68 |
|
| 69 |
+
def agent_for_model(model_name: str):
|
| 70 |
"""
|
| 71 |
Returns an instantiated agent for the given local model id.
|
| 72 |
Raises ValueError on unknown local ids.
|
|
|
|
| 117 |
return reasoning
|
| 118 |
|
| 119 |
|
| 120 |
+
def make_sse_generator(
|
| 121 |
task: str,
|
| 122 |
agent_for_request: typing.Any,
|
| 123 |
model_name: str,
|
|
|
|
| 224 |
return _gen
|
| 225 |
|
| 226 |
|
| 227 |
+
async def run_non_streaming(task: str, agent_for_request: typing.Any) -> str:
|
| 228 |
"""
|
| 229 |
Runs the agent and returns a single OpenAI-style text (with optional <think> block).
|
| 230 |
"""
|
agent_server/helpers.py
CHANGED
|
@@ -36,7 +36,7 @@ def normalize_content_to_text(content: typing.Any) -> str:
|
|
| 36 |
return str(content)
|
| 37 |
|
| 38 |
|
| 39 |
-
def
|
| 40 |
system_parts = [
|
| 41 |
normalize_content_to_text(m.get("content", ""))
|
| 42 |
for m in messages
|
|
@@ -65,9 +65,7 @@ def _messages_to_task(messages: typing.List[ChatMessage]) -> str:
|
|
| 65 |
return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
|
| 66 |
|
| 67 |
|
| 68 |
-
def
|
| 69 |
-
message_text: str, model_name: str
|
| 70 |
-
) -> typing.Dict[str, typing.Any]:
|
| 71 |
now = int(time.time())
|
| 72 |
return {
|
| 73 |
"id": f"chatcmpl-smol-{now}",
|
|
@@ -85,7 +83,7 @@ def _openai_response(
|
|
| 85 |
}
|
| 86 |
|
| 87 |
|
| 88 |
-
def
|
| 89 |
return {
|
| 90 |
"Cache-Control": "no-cache, no-transform",
|
| 91 |
"Connection": "keep-alive",
|
|
|
|
| 36 |
return str(content)
|
| 37 |
|
| 38 |
|
| 39 |
+
def messages_to_task(messages: typing.List[ChatMessage]) -> str:
|
| 40 |
system_parts = [
|
| 41 |
normalize_content_to_text(m.get("content", ""))
|
| 42 |
for m in messages
|
|
|
|
| 65 |
return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
|
| 66 |
|
| 67 |
|
| 68 |
+
def openai_response(message_text: str, model_name: str) -> typing.Dict[str, typing.Any]:
|
|
|
|
|
|
|
| 69 |
now = int(time.time())
|
| 70 |
return {
|
| 71 |
"id": f"chatcmpl-smol-{now}",
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
|
| 86 |
+
def sse_headers() -> dict:
|
| 87 |
return {
|
| 88 |
"Cache-Control": "no-cache, no-transform",
|
| 89 |
"Connection": "keep-alive",
|
agent_server/models.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import agent_server.helpers
|
| 3 |
|
| 4 |
|
| 5 |
-
def
|
| 6 |
"""
|
| 7 |
Returns the /v1/models response payload.
|
| 8 |
"""
|
|
|
|
| 1 |
import os
|
| 2 |
+
|
| 3 |
import agent_server.helpers
|
| 4 |
|
| 5 |
|
| 6 |
+
def models_payload() -> dict:
|
| 7 |
"""
|
| 8 |
Returns the /v1/models response payload.
|
| 9 |
"""
|
agents/code_writing_agents.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import smolagents
|
| 3 |
import smolagents.models
|
| 4 |
|
|
|
|
| 1 |
import os
|
| 2 |
+
|
| 3 |
import smolagents
|
| 4 |
import smolagents.models
|
| 5 |
|
agents/generator_and_critic.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
from __future__ import annotations
|
|
|
|
| 2 |
import os
|
| 3 |
-
from typing import List, Optional
|
| 4 |
-
from smolagents import CodeAgent, ToolCallingAgent
|
| 5 |
-
import smolagents.models
|
| 6 |
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# ---------------- Agent Prompts ----------------
|
| 9 |
GENERATOR_INSTRUCTIONS = """
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import os
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
import smolagents.models
|
| 6 |
+
from smolagents import CodeAgent, ToolCallingAgent
|
| 7 |
|
| 8 |
# ---------------- Agent Prompts ----------------
|
| 9 |
GENERATOR_INSTRUCTIONS = """
|
agents/json_tool_calling_agents.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import smolagents
|
| 3 |
import smolagents.models
|
| 4 |
|
|
|
|
| 1 |
import os
|
| 2 |
+
|
| 3 |
import smolagents
|
| 4 |
import smolagents.models
|
| 5 |
|
proxy.py
CHANGED
|
@@ -3,33 +3,32 @@ OpenAI-compatible FastAPI proxy that wraps a smolagents CodeAgent
|
|
| 3 |
Refactored for readability and modularity (single-file).
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 6 |
import os # For dealing with env vars
|
| 7 |
import typing # For type annotations
|
| 8 |
-
import logging # For logging
|
| 9 |
-
|
| 10 |
|
| 11 |
import fastapi
|
| 12 |
import fastapi.responses
|
| 13 |
|
| 14 |
# Upstream pass-through + local helpers
|
| 15 |
from agent_server.agent_streaming import (
|
| 16 |
-
|
| 17 |
)
|
| 18 |
from agent_server.chat_completions import (
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
)
|
| 27 |
from agent_server.helpers import (
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
)
|
| 32 |
-
from agent_server.models import
|
| 33 |
from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
|
| 34 |
|
| 35 |
# Local agent factories
|
|
@@ -56,7 +55,7 @@ async def healthz():
|
|
| 56 |
|
| 57 |
@app.get("/v1/models")
|
| 58 |
async def list_models():
|
| 59 |
-
return
|
| 60 |
|
| 61 |
|
| 62 |
@app.post("/v1/chat/completions")
|
|
@@ -75,38 +74,38 @@ async def chat_completions(req: fastapi.Request):
|
|
| 75 |
typing.List[ChatMessage], body.get("messages") or []
|
| 76 |
)
|
| 77 |
stream: bool = bool(body.get("stream", False))
|
| 78 |
-
model_name: str =
|
| 79 |
|
| 80 |
try:
|
| 81 |
# ---------------- Upstream pass-through modes ----------------
|
| 82 |
-
if
|
| 83 |
# Raw pass-through to upstream
|
| 84 |
-
return await
|
| 85 |
|
| 86 |
-
if
|
| 87 |
# Modify body for /nothink and forward to upstream
|
| 88 |
-
return await
|
| 89 |
-
|
| 90 |
)
|
| 91 |
|
| 92 |
# ---------------- Local agent execution ----------------
|
| 93 |
# Convert OpenAI messages -> internal "task"
|
| 94 |
-
task: str =
|
| 95 |
|
| 96 |
# Create agent impl for the requested local model
|
| 97 |
-
agent_for_request =
|
| 98 |
|
| 99 |
if stream:
|
| 100 |
# Streaming: return SSE response
|
| 101 |
-
gen =
|
| 102 |
return fastapi.responses.StreamingResponse(
|
| 103 |
-
gen(), media_type="text/event-stream", headers=
|
| 104 |
)
|
| 105 |
else:
|
| 106 |
# Non-streaming: materialize final text and wrap in OpenAI shape
|
| 107 |
-
result_text = await
|
| 108 |
return fastapi.responses.JSONResponse(
|
| 109 |
-
|
| 110 |
)
|
| 111 |
|
| 112 |
except ValueError as ve:
|
|
|
|
| 3 |
Refactored for readability and modularity (single-file).
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
import logging # For logging
|
| 7 |
import os # For dealing with env vars
|
| 8 |
import typing # For type annotations
|
|
|
|
|
|
|
| 9 |
|
| 10 |
import fastapi
|
| 11 |
import fastapi.responses
|
| 12 |
|
| 13 |
# Upstream pass-through + local helpers
|
| 14 |
from agent_server.agent_streaming import (
|
| 15 |
+
proxy_upstream_chat_completions,
|
| 16 |
)
|
| 17 |
from agent_server.chat_completions import (
|
| 18 |
+
normalize_model_name,
|
| 19 |
+
is_upstream_passthrough,
|
| 20 |
+
is_upstream_passthrough_nothink,
|
| 21 |
+
apply_nothink_to_body,
|
| 22 |
+
agent_for_model,
|
| 23 |
+
make_sse_generator,
|
| 24 |
+
run_non_streaming,
|
| 25 |
)
|
| 26 |
from agent_server.helpers import (
|
| 27 |
+
messages_to_task,
|
| 28 |
+
openai_response,
|
| 29 |
+
sse_headers,
|
| 30 |
)
|
| 31 |
+
from agent_server.models import models_payload
|
| 32 |
from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
|
| 33 |
|
| 34 |
# Local agent factories
|
|
|
|
| 55 |
|
| 56 |
@app.get("/v1/models")
|
| 57 |
async def list_models():
|
| 58 |
+
return models_payload()
|
| 59 |
|
| 60 |
|
| 61 |
@app.post("/v1/chat/completions")
|
|
|
|
| 74 |
typing.List[ChatMessage], body.get("messages") or []
|
| 75 |
)
|
| 76 |
stream: bool = bool(body.get("stream", False))
|
| 77 |
+
model_name: str = normalize_model_name(body.get("model"))
|
| 78 |
|
| 79 |
try:
|
| 80 |
# ---------------- Upstream pass-through modes ----------------
|
| 81 |
+
if is_upstream_passthrough(model_name):
|
| 82 |
# Raw pass-through to upstream
|
| 83 |
+
return await proxy_upstream_chat_completions(dict(body), stream)
|
| 84 |
|
| 85 |
+
if is_upstream_passthrough_nothink(model_name):
|
| 86 |
# Modify body for /nothink and forward to upstream
|
| 87 |
+
return await proxy_upstream_chat_completions(
|
| 88 |
+
apply_nothink_to_body(body, messages), stream, scrub_think=True
|
| 89 |
)
|
| 90 |
|
| 91 |
# ---------------- Local agent execution ----------------
|
| 92 |
# Convert OpenAI messages -> internal "task"
|
| 93 |
+
task: str = messages_to_task(messages)
|
| 94 |
|
| 95 |
# Create agent impl for the requested local model
|
| 96 |
+
agent_for_request = agent_for_model(model_name)
|
| 97 |
|
| 98 |
if stream:
|
| 99 |
# Streaming: return SSE response
|
| 100 |
+
gen = make_sse_generator(task, agent_for_request, model_name)
|
| 101 |
return fastapi.responses.StreamingResponse(
|
| 102 |
+
gen(), media_type="text/event-stream", headers=sse_headers()
|
| 103 |
)
|
| 104 |
else:
|
| 105 |
# Non-streaming: materialize final text and wrap in OpenAI shape
|
| 106 |
+
result_text = await run_non_streaming(task, agent_for_request)
|
| 107 |
return fastapi.responses.JSONResponse(
|
| 108 |
+
openai_response(result_text, model_name)
|
| 109 |
)
|
| 110 |
|
| 111 |
except ValueError as ve:
|