Spaces:

ccm
/

chat-ui-with-agent-examples

Sleeping

App Files Files Community

ccm commited on 16 days ago

Commit

b0394f8

1 Parent(s): bbf3a47

Cleaning up imports

Browse files

Files changed (8) hide show

agent_server/agent_streaming.py +5 -5
agent_server/chat_completions.py +7 -8
agent_server/helpers.py +3 -5
agent_server/models.py +2 -1
agents/code_writing_agents.py +1 -0
agents/generator_and_critic.py +3 -3
agents/json_tool_calling_agents.py +1 -0
proxy.py +26 -27

agent_server/agent_streaming.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import os
 import asyncio
 import contextlib
 import threading
 import time
 import typing
@@ -8,7 +8,7 @@ import typing
 import fastapi
 import httpx
-from agent_server.helpers import _sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
 from agent_server.std_tee import QueueWriter, _serialize_step
@@ -191,10 +191,10 @@ def _recursively_scrub(obj):
     return obj
-async def _proxy_upstream_chat_completions(
     body: dict, stream: bool, scrub_think: bool = False
 ):
-    HF_TOKEN=os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
         "Content-Type": "application/json",
@@ -223,7 +223,7 @@ async def _proxy_upstream_chat_completions(
                             yield chunk
         return fastapi.responses.StreamingResponse(
-            proxy_stream(), media_type="text/event-stream", headers=_sse_headers()
         )
     else:
         async with httpx.AsyncClient(timeout=None) as client:

 import asyncio
 import contextlib
+import os
 import threading
 import time
 import typing
 import fastapi
 import httpx
+from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
 from agent_server.std_tee import QueueWriter, _serialize_step
     return obj
+async def proxy_upstream_chat_completions(
     body: dict, stream: bool, scrub_think: bool = False
 ):
+    HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
         "Content-Type": "application/json",
                             yield chunk
         return fastapi.responses.StreamingResponse(
+            proxy_stream(), media_type="text/event-stream", headers=sse_headers()
         )
     else:
         async with httpx.AsyncClient(timeout=None) as client:

agent_server/chat_completions.py CHANGED Viewed

@@ -21,11 +21,10 @@ from agents.json_tool_calling_agents import (
     generate_tool_calling_agent_with_search_and_code,
 )
 AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
-def _normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
     """
     Accepts either a bare model string or {"id": "..."} form; default to the
     local code-writing agent if unspecified.
@@ -37,15 +36,15 @@ def _normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
     return "code-writing-agent-without-tools"
-def _is_upstream_passthrough(model_name: str) -> bool:
     return model_name == AGENT_MODEL
-def _is_upstream_passthrough_nothink(model_name: str) -> bool:
     return model_name == f"{AGENT_MODEL}-nothink"
-def _apply_nothink_to_body(
     body: ChatCompletionRequest, messages: typing.List[ChatMessage]
 ) -> ChatCompletionRequest:
     """
@@ -67,7 +66,7 @@ def _apply_nothink_to_body(
     return new_body
-def _agent_for_model(model_name: str):
     """
     Returns an instantiated agent for the given local model id.
     Raises ValueError on unknown local ids.
@@ -118,7 +117,7 @@ def _truncate_reasoning_blob(reasoning: str, limit: int = 24000) -> str:
     return reasoning
-def _make_sse_generator(
     task: str,
     agent_for_request: typing.Any,
     model_name: str,
@@ -225,7 +224,7 @@ def _make_sse_generator(
     return _gen
-async def _run_non_streaming(task: str, agent_for_request: typing.Any) -> str:
     """
     Runs the agent and returns a single OpenAI-style text (with optional <think> block).
     """

     generate_tool_calling_agent_with_search_and_code,
 )
 AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
+def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
     """
     Accepts either a bare model string or {"id": "..."} form; default to the
     local code-writing agent if unspecified.
     return "code-writing-agent-without-tools"
+def is_upstream_passthrough(model_name: str) -> bool:
     return model_name == AGENT_MODEL
+def is_upstream_passthrough_nothink(model_name: str) -> bool:
     return model_name == f"{AGENT_MODEL}-nothink"
+def apply_nothink_to_body(
     body: ChatCompletionRequest, messages: typing.List[ChatMessage]
 ) -> ChatCompletionRequest:
     """
     return new_body
+def agent_for_model(model_name: str):
     """
     Returns an instantiated agent for the given local model id.
     Raises ValueError on unknown local ids.
     return reasoning
+def make_sse_generator(
     task: str,
     agent_for_request: typing.Any,
     model_name: str,
     return _gen
+async def run_non_streaming(task: str, agent_for_request: typing.Any) -> str:
     """
     Runs the agent and returns a single OpenAI-style text (with optional <think> block).
     """

agent_server/helpers.py CHANGED Viewed

@@ -36,7 +36,7 @@ def normalize_content_to_text(content: typing.Any) -> str:
     return str(content)
-def _messages_to_task(messages: typing.List[ChatMessage]) -> str:
     system_parts = [
         normalize_content_to_text(m.get("content", ""))
         for m in messages
@@ -65,9 +65,7 @@ def _messages_to_task(messages: typing.List[ChatMessage]) -> str:
     return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
-def _openai_response(
-    message_text: str, model_name: str
-) -> typing.Dict[str, typing.Any]:
     now = int(time.time())
     return {
         "id": f"chatcmpl-smol-{now}",
@@ -85,7 +83,7 @@ def _openai_response(
     }
-def _sse_headers() -> dict:
     return {
         "Cache-Control": "no-cache, no-transform",
         "Connection": "keep-alive",

     return str(content)
+def messages_to_task(messages: typing.List[ChatMessage]) -> str:
     system_parts = [
         normalize_content_to_text(m.get("content", ""))
         for m in messages
     return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
+def openai_response(message_text: str, model_name: str) -> typing.Dict[str, typing.Any]:
     now = int(time.time())
     return {
         "id": f"chatcmpl-smol-{now}",
     }
+def sse_headers() -> dict:
     return {
         "Cache-Control": "no-cache, no-transform",
         "Connection": "keep-alive",

agent_server/models.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import agent_server.helpers
-def _models_payload() -> dict:
     """
     Returns the /v1/models response payload.
     """

 import os
 import agent_server.helpers
+def models_payload() -> dict:
     """
     Returns the /v1/models response payload.
     """

agents/code_writing_agents.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import smolagents
 import smolagents.models

 import os
 import smolagents
 import smolagents.models

agents/generator_and_critic.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from __future__ import annotations
 import os
-from typing import List, Optional
-from smolagents import CodeAgent, ToolCallingAgent
-import smolagents.models
 # ---------------- Agent Prompts ----------------
 GENERATOR_INSTRUCTIONS = """

 from __future__ import annotations
 import os
+import smolagents.models
+from smolagents import CodeAgent, ToolCallingAgent
 # ---------------- Agent Prompts ----------------
 GENERATOR_INSTRUCTIONS = """

agents/json_tool_calling_agents.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import smolagents
 import smolagents.models

 import os
 import smolagents
 import smolagents.models

proxy.py CHANGED Viewed

@@ -3,33 +3,32 @@ OpenAI-compatible FastAPI proxy that wraps a smolagents CodeAgent
 Refactored for readability and modularity (single-file).
 """
 import os  # For dealing with env vars
 import typing  # For type annotations
-import logging  # For logging
 import fastapi
 import fastapi.responses
 # Upstream pass-through + local helpers
 from agent_server.agent_streaming import (
-    _proxy_upstream_chat_completions,
 )
 from agent_server.chat_completions import (
-    _normalize_model_name,
-    _is_upstream_passthrough,
-    _is_upstream_passthrough_nothink,
-    _apply_nothink_to_body,
-    _agent_for_model,
-    _make_sse_generator,
-    _run_non_streaming,
 )
 from agent_server.helpers import (
-    _messages_to_task,
-    _openai_response,
-    _sse_headers,
 )
-from agent_server.models import _models_payload
 from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
 # Local agent factories
@@ -56,7 +55,7 @@ async def healthz():
 @app.get("/v1/models")
 async def list_models():
-    return _models_payload()
 @app.post("/v1/chat/completions")
@@ -75,38 +74,38 @@ async def chat_completions(req: fastapi.Request):
         typing.List[ChatMessage], body.get("messages") or []
     )
     stream: bool = bool(body.get("stream", False))
-    model_name: str = _normalize_model_name(body.get("model"))
     try:
         # ---------------- Upstream pass-through modes ----------------
-        if _is_upstream_passthrough(model_name):
             # Raw pass-through to upstream
-            return await _proxy_upstream_chat_completions(dict(body), stream)
-        if _is_upstream_passthrough_nothink(model_name):
             # Modify body for /nothink and forward to upstream
-            return await _proxy_upstream_chat_completions(
-                _apply_nothink_to_body(body, messages), stream, scrub_think=True
             )
         # ---------------- Local agent execution ----------------
         # Convert OpenAI messages -> internal "task"
-        task: str = _messages_to_task(messages)
         # Create agent impl for the requested local model
-        agent_for_request = _agent_for_model(model_name)
         if stream:
             # Streaming: return SSE response
-            gen = _make_sse_generator(task, agent_for_request, model_name)
             return fastapi.responses.StreamingResponse(
-                gen(), media_type="text/event-stream", headers=_sse_headers()
             )
         else:
             # Non-streaming: materialize final text and wrap in OpenAI shape
-            result_text = await _run_non_streaming(task, agent_for_request)
             return fastapi.responses.JSONResponse(
-                _openai_response(result_text, model_name)
             )
     except ValueError as ve:

 Refactored for readability and modularity (single-file).
 """
+import logging  # For logging
 import os  # For dealing with env vars
 import typing  # For type annotations
 import fastapi
 import fastapi.responses
 # Upstream pass-through + local helpers
 from agent_server.agent_streaming import (
+    proxy_upstream_chat_completions,
 )
 from agent_server.chat_completions import (
+    normalize_model_name,
+    is_upstream_passthrough,
+    is_upstream_passthrough_nothink,
+    apply_nothink_to_body,
+    agent_for_model,
+    make_sse_generator,
+    run_non_streaming,
 )
 from agent_server.helpers import (
+    messages_to_task,
+    openai_response,
+    sse_headers,
 )
+from agent_server.models import models_payload
 from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
 # Local agent factories
 @app.get("/v1/models")
 async def list_models():
+    return models_payload()
 @app.post("/v1/chat/completions")
         typing.List[ChatMessage], body.get("messages") or []
     )
     stream: bool = bool(body.get("stream", False))
+    model_name: str = normalize_model_name(body.get("model"))
     try:
         # ---------------- Upstream pass-through modes ----------------
+        if is_upstream_passthrough(model_name):
             # Raw pass-through to upstream
+            return await proxy_upstream_chat_completions(dict(body), stream)
+        if is_upstream_passthrough_nothink(model_name):
             # Modify body for /nothink and forward to upstream
+            return await proxy_upstream_chat_completions(
+                apply_nothink_to_body(body, messages), stream, scrub_think=True
             )
         # ---------------- Local agent execution ----------------
         # Convert OpenAI messages -> internal "task"
+        task: str = messages_to_task(messages)
         # Create agent impl for the requested local model
+        agent_for_request = agent_for_model(model_name)
         if stream:
             # Streaming: return SSE response
+            gen = make_sse_generator(task, agent_for_request, model_name)
             return fastapi.responses.StreamingResponse(
+                gen(), media_type="text/event-stream", headers=sse_headers()
             )
         else:
             # Non-streaming: materialize final text and wrap in OpenAI shape
+            result_text = await run_non_streaming(task, agent_for_request)
             return fastapi.responses.JSONResponse(
+                openai_response(result_text, model_name)
             )
     except ValueError as ve: