ccm commited on
Commit
b0394f8
·
1 Parent(s): bbf3a47

Cleaning up imports

Browse files
agent_server/agent_streaming.py CHANGED
@@ -1,6 +1,6 @@
1
- import os
2
  import asyncio
3
  import contextlib
 
4
  import threading
5
  import time
6
  import typing
@@ -8,7 +8,7 @@ import typing
8
  import fastapi
9
  import httpx
10
 
11
- from agent_server.helpers import _sse_headers
12
  from agent_server.sanitizing_think_tags import scrub_think_tags
13
  from agent_server.std_tee import QueueWriter, _serialize_step
14
 
@@ -191,10 +191,10 @@ def _recursively_scrub(obj):
191
  return obj
192
 
193
 
194
- async def _proxy_upstream_chat_completions(
195
  body: dict, stream: bool, scrub_think: bool = False
196
  ):
197
- HF_TOKEN=os.getenv("OPENAI_API_KEY")
198
  headers = {
199
  "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
200
  "Content-Type": "application/json",
@@ -223,7 +223,7 @@ async def _proxy_upstream_chat_completions(
223
  yield chunk
224
 
225
  return fastapi.responses.StreamingResponse(
226
- proxy_stream(), media_type="text/event-stream", headers=_sse_headers()
227
  )
228
  else:
229
  async with httpx.AsyncClient(timeout=None) as client:
 
 
1
  import asyncio
2
  import contextlib
3
+ import os
4
  import threading
5
  import time
6
  import typing
 
8
  import fastapi
9
  import httpx
10
 
11
+ from agent_server.helpers import sse_headers
12
  from agent_server.sanitizing_think_tags import scrub_think_tags
13
  from agent_server.std_tee import QueueWriter, _serialize_step
14
 
 
191
  return obj
192
 
193
 
194
+ async def proxy_upstream_chat_completions(
195
  body: dict, stream: bool, scrub_think: bool = False
196
  ):
197
+ HF_TOKEN = os.getenv("OPENAI_API_KEY")
198
  headers = {
199
  "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
200
  "Content-Type": "application/json",
 
223
  yield chunk
224
 
225
  return fastapi.responses.StreamingResponse(
226
+ proxy_stream(), media_type="text/event-stream", headers=sse_headers()
227
  )
228
  else:
229
  async with httpx.AsyncClient(timeout=None) as client:
agent_server/chat_completions.py CHANGED
@@ -21,11 +21,10 @@ from agents.json_tool_calling_agents import (
21
  generate_tool_calling_agent_with_search_and_code,
22
  )
23
 
24
-
25
  AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
26
 
27
 
28
- def _normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
29
  """
30
  Accepts either a bare model string or {"id": "..."} form; default to the
31
  local code-writing agent if unspecified.
@@ -37,15 +36,15 @@ def _normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
37
  return "code-writing-agent-without-tools"
38
 
39
 
40
- def _is_upstream_passthrough(model_name: str) -> bool:
41
  return model_name == AGENT_MODEL
42
 
43
 
44
- def _is_upstream_passthrough_nothink(model_name: str) -> bool:
45
  return model_name == f"{AGENT_MODEL}-nothink"
46
 
47
 
48
- def _apply_nothink_to_body(
49
  body: ChatCompletionRequest, messages: typing.List[ChatMessage]
50
  ) -> ChatCompletionRequest:
51
  """
@@ -67,7 +66,7 @@ def _apply_nothink_to_body(
67
  return new_body
68
 
69
 
70
- def _agent_for_model(model_name: str):
71
  """
72
  Returns an instantiated agent for the given local model id.
73
  Raises ValueError on unknown local ids.
@@ -118,7 +117,7 @@ def _truncate_reasoning_blob(reasoning: str, limit: int = 24000) -> str:
118
  return reasoning
119
 
120
 
121
- def _make_sse_generator(
122
  task: str,
123
  agent_for_request: typing.Any,
124
  model_name: str,
@@ -225,7 +224,7 @@ def _make_sse_generator(
225
  return _gen
226
 
227
 
228
- async def _run_non_streaming(task: str, agent_for_request: typing.Any) -> str:
229
  """
230
  Runs the agent and returns a single OpenAI-style text (with optional <think> block).
231
  """
 
21
  generate_tool_calling_agent_with_search_and_code,
22
  )
23
 
 
24
  AGENT_MODEL = os.getenv("AGENT_MODEL", "Qwen/Qwen3-1.7B")
25
 
26
 
27
+ def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
28
  """
29
  Accepts either a bare model string or {"id": "..."} form; default to the
30
  local code-writing agent if unspecified.
 
36
  return "code-writing-agent-without-tools"
37
 
38
 
39
+ def is_upstream_passthrough(model_name: str) -> bool:
40
  return model_name == AGENT_MODEL
41
 
42
 
43
+ def is_upstream_passthrough_nothink(model_name: str) -> bool:
44
  return model_name == f"{AGENT_MODEL}-nothink"
45
 
46
 
47
+ def apply_nothink_to_body(
48
  body: ChatCompletionRequest, messages: typing.List[ChatMessage]
49
  ) -> ChatCompletionRequest:
50
  """
 
66
  return new_body
67
 
68
 
69
+ def agent_for_model(model_name: str):
70
  """
71
  Returns an instantiated agent for the given local model id.
72
  Raises ValueError on unknown local ids.
 
117
  return reasoning
118
 
119
 
120
+ def make_sse_generator(
121
  task: str,
122
  agent_for_request: typing.Any,
123
  model_name: str,
 
224
  return _gen
225
 
226
 
227
+ async def run_non_streaming(task: str, agent_for_request: typing.Any) -> str:
228
  """
229
  Runs the agent and returns a single OpenAI-style text (with optional <think> block).
230
  """
agent_server/helpers.py CHANGED
@@ -36,7 +36,7 @@ def normalize_content_to_text(content: typing.Any) -> str:
36
  return str(content)
37
 
38
 
39
- def _messages_to_task(messages: typing.List[ChatMessage]) -> str:
40
  system_parts = [
41
  normalize_content_to_text(m.get("content", ""))
42
  for m in messages
@@ -65,9 +65,7 @@ def _messages_to_task(messages: typing.List[ChatMessage]) -> str:
65
  return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
66
 
67
 
68
- def _openai_response(
69
- message_text: str, model_name: str
70
- ) -> typing.Dict[str, typing.Any]:
71
  now = int(time.time())
72
  return {
73
  "id": f"chatcmpl-smol-{now}",
@@ -85,7 +83,7 @@ def _openai_response(
85
  }
86
 
87
 
88
- def _sse_headers() -> dict:
89
  return {
90
  "Cache-Control": "no-cache, no-transform",
91
  "Connection": "keep-alive",
 
36
  return str(content)
37
 
38
 
39
+ def messages_to_task(messages: typing.List[ChatMessage]) -> str:
40
  system_parts = [
41
  normalize_content_to_text(m.get("content", ""))
42
  for m in messages
 
65
  return f"{sys_txt}\nTask:\n{last_user}\n{history}".strip()
66
 
67
 
68
+ def openai_response(message_text: str, model_name: str) -> typing.Dict[str, typing.Any]:
 
 
69
  now = int(time.time())
70
  return {
71
  "id": f"chatcmpl-smol-{now}",
 
83
  }
84
 
85
 
86
+ def sse_headers() -> dict:
87
  return {
88
  "Cache-Control": "no-cache, no-transform",
89
  "Connection": "keep-alive",
agent_server/models.py CHANGED
@@ -1,8 +1,9 @@
1
  import os
 
2
  import agent_server.helpers
3
 
4
 
5
- def _models_payload() -> dict:
6
  """
7
  Returns the /v1/models response payload.
8
  """
 
1
  import os
2
+
3
  import agent_server.helpers
4
 
5
 
6
+ def models_payload() -> dict:
7
  """
8
  Returns the /v1/models response payload.
9
  """
agents/code_writing_agents.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import smolagents
3
  import smolagents.models
4
 
 
1
  import os
2
+
3
  import smolagents
4
  import smolagents.models
5
 
agents/generator_and_critic.py CHANGED
@@ -1,9 +1,9 @@
1
  from __future__ import annotations
 
2
  import os
3
- from typing import List, Optional
4
- from smolagents import CodeAgent, ToolCallingAgent
5
- import smolagents.models
6
 
 
 
7
 
8
  # ---------------- Agent Prompts ----------------
9
  GENERATOR_INSTRUCTIONS = """
 
1
  from __future__ import annotations
2
+
3
  import os
 
 
 
4
 
5
+ import smolagents.models
6
+ from smolagents import CodeAgent, ToolCallingAgent
7
 
8
  # ---------------- Agent Prompts ----------------
9
  GENERATOR_INSTRUCTIONS = """
agents/json_tool_calling_agents.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import smolagents
3
  import smolagents.models
4
 
 
1
  import os
2
+
3
  import smolagents
4
  import smolagents.models
5
 
proxy.py CHANGED
@@ -3,33 +3,32 @@ OpenAI-compatible FastAPI proxy that wraps a smolagents CodeAgent
3
  Refactored for readability and modularity (single-file).
4
  """
5
 
 
6
  import os # For dealing with env vars
7
  import typing # For type annotations
8
- import logging # For logging
9
-
10
 
11
  import fastapi
12
  import fastapi.responses
13
 
14
  # Upstream pass-through + local helpers
15
  from agent_server.agent_streaming import (
16
- _proxy_upstream_chat_completions,
17
  )
18
  from agent_server.chat_completions import (
19
- _normalize_model_name,
20
- _is_upstream_passthrough,
21
- _is_upstream_passthrough_nothink,
22
- _apply_nothink_to_body,
23
- _agent_for_model,
24
- _make_sse_generator,
25
- _run_non_streaming,
26
  )
27
  from agent_server.helpers import (
28
- _messages_to_task,
29
- _openai_response,
30
- _sse_headers,
31
  )
32
- from agent_server.models import _models_payload
33
  from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
34
 
35
  # Local agent factories
@@ -56,7 +55,7 @@ async def healthz():
56
 
57
  @app.get("/v1/models")
58
  async def list_models():
59
- return _models_payload()
60
 
61
 
62
  @app.post("/v1/chat/completions")
@@ -75,38 +74,38 @@ async def chat_completions(req: fastapi.Request):
75
  typing.List[ChatMessage], body.get("messages") or []
76
  )
77
  stream: bool = bool(body.get("stream", False))
78
- model_name: str = _normalize_model_name(body.get("model"))
79
 
80
  try:
81
  # ---------------- Upstream pass-through modes ----------------
82
- if _is_upstream_passthrough(model_name):
83
  # Raw pass-through to upstream
84
- return await _proxy_upstream_chat_completions(dict(body), stream)
85
 
86
- if _is_upstream_passthrough_nothink(model_name):
87
  # Modify body for /nothink and forward to upstream
88
- return await _proxy_upstream_chat_completions(
89
- _apply_nothink_to_body(body, messages), stream, scrub_think=True
90
  )
91
 
92
  # ---------------- Local agent execution ----------------
93
  # Convert OpenAI messages -> internal "task"
94
- task: str = _messages_to_task(messages)
95
 
96
  # Create agent impl for the requested local model
97
- agent_for_request = _agent_for_model(model_name)
98
 
99
  if stream:
100
  # Streaming: return SSE response
101
- gen = _make_sse_generator(task, agent_for_request, model_name)
102
  return fastapi.responses.StreamingResponse(
103
- gen(), media_type="text/event-stream", headers=_sse_headers()
104
  )
105
  else:
106
  # Non-streaming: materialize final text and wrap in OpenAI shape
107
- result_text = await _run_non_streaming(task, agent_for_request)
108
  return fastapi.responses.JSONResponse(
109
- _openai_response(result_text, model_name)
110
  )
111
 
112
  except ValueError as ve:
 
3
  Refactored for readability and modularity (single-file).
4
  """
5
 
6
+ import logging # For logging
7
  import os # For dealing with env vars
8
  import typing # For type annotations
 
 
9
 
10
  import fastapi
11
  import fastapi.responses
12
 
13
  # Upstream pass-through + local helpers
14
  from agent_server.agent_streaming import (
15
+ proxy_upstream_chat_completions,
16
  )
17
  from agent_server.chat_completions import (
18
+ normalize_model_name,
19
+ is_upstream_passthrough,
20
+ is_upstream_passthrough_nothink,
21
+ apply_nothink_to_body,
22
+ agent_for_model,
23
+ make_sse_generator,
24
+ run_non_streaming,
25
  )
26
  from agent_server.helpers import (
27
+ messages_to_task,
28
+ openai_response,
29
+ sse_headers,
30
  )
31
+ from agent_server.models import models_payload
32
  from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
33
 
34
  # Local agent factories
 
55
 
56
  @app.get("/v1/models")
57
  async def list_models():
58
+ return models_payload()
59
 
60
 
61
  @app.post("/v1/chat/completions")
 
74
  typing.List[ChatMessage], body.get("messages") or []
75
  )
76
  stream: bool = bool(body.get("stream", False))
77
+ model_name: str = normalize_model_name(body.get("model"))
78
 
79
  try:
80
  # ---------------- Upstream pass-through modes ----------------
81
+ if is_upstream_passthrough(model_name):
82
  # Raw pass-through to upstream
83
+ return await proxy_upstream_chat_completions(dict(body), stream)
84
 
85
+ if is_upstream_passthrough_nothink(model_name):
86
  # Modify body for /nothink and forward to upstream
87
+ return await proxy_upstream_chat_completions(
88
+ apply_nothink_to_body(body, messages), stream, scrub_think=True
89
  )
90
 
91
  # ---------------- Local agent execution ----------------
92
  # Convert OpenAI messages -> internal "task"
93
+ task: str = messages_to_task(messages)
94
 
95
  # Create agent impl for the requested local model
96
+ agent_for_request = agent_for_model(model_name)
97
 
98
  if stream:
99
  # Streaming: return SSE response
100
+ gen = make_sse_generator(task, agent_for_request, model_name)
101
  return fastapi.responses.StreamingResponse(
102
+ gen(), media_type="text/event-stream", headers=sse_headers()
103
  )
104
  else:
105
  # Non-streaming: materialize final text and wrap in OpenAI shape
106
+ result_text = await run_non_streaming(task, agent_for_request)
107
  return fastapi.responses.JSONResponse(
108
+ openai_response(result_text, model_name)
109
  )
110
 
111
  except ValueError as ve: