Spaces:

ccm
/

chat-ui-with-agent-examples

Sleeping

App Files Files Community

ccm commited on 14 days ago

Commit

cdca445

1 Parent(s): a6d1b6d

Reverting logging changes, updating agent.

Browse files

Files changed (3) hide show

agent_server/agent_streaming.py +87 -384
agent_server/std_tee.py +62 -265
agents/generator_and_critic.py +9 -53

agent_server/agent_streaming.py CHANGED Viewed

@@ -1,91 +1,19 @@
-# agent_server/agent_streaming.py
-from __future__ import annotations
 import asyncio
 import contextlib
 import os
 import threading
 import time
-import typing as t
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
-from agent_server.std_tee import (
-    QueueWriter,
-    _serialize_step,
-    _format_reasoning_chunk,
-    _maybe_parse_final_from_stdout,
-)
-# ---------------------------------------------------------------------------
-# Memory poller: normalize all agent types to uniform step blocks.
-# ---------------------------------------------------------------------------
-def start_memory_poller(
-    agent: t.Any,
-    q: "asyncio.Queue[dict]",
-    stop_evt: "threading.Event",
-    interval: float = 0.10,
-) -> threading.Thread:
-    """
-    Starts a background thread that polls agent memory and enqueues formatted step blocks.
-    Tries several attribute paths to support different agent implementations.
-    """
-    last_len = 0
-    def _get_steps_safe() -> list:
-        # Try canonical memory APIs first
-        try:
-            mem = getattr(agent, "memory", None)
-            if mem is not None:
-                for attr in ("get_full_steps", "get_steps", "get_all_steps"):
-                    fn = getattr(mem, attr, None)
-                    if callable(fn):
-                        steps = fn()
-                        return list(steps or [])
-        except Exception:
-            pass
-        # Fallback: common direct list field
-        try:
-            raw = getattr(agent, "steps", None)
-            if raw:
-                return list(raw)
-        except Exception:
-            pass
-        return []
-    def _run():
-        nonlocal last_len
-        while not stop_evt.is_set():
-            try:
-                steps = _get_steps_safe()
-                if steps and len(steps) > last_len:
-                    new = steps[last_len:]
-                    last_len = len(steps)
-                    for s in new:
-                        try:
-                            s_text = _serialize_step(s)
-                            if s_text:
-                                q.put_nowait({"__step__": s_text})
-                        except Exception:
-                            # Never let formatting kill polling
-                            pass
-            except Exception:
-                pass
-            time.sleep(interval)
-    th = threading.Thread(target=_run, name="memory-poller", daemon=True)
-    th.start()
-    return th
-# ---------------------------------------------------------------------------
-# Unified agent streaming: stdout/stderr, memory steps, iterator yields.
-# Adds normalized reasoning via __reasoning__ while preserving legacy keys.
-# ---------------------------------------------------------------------------
-async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
     """
     Start the agent in a worker thread.
     Stream THREE sources of incremental data into the async generator:
@@ -93,62 +21,97 @@ async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
       (2) newly appended memory steps (polled),
       (3) any iterable the agent may yield (if supported).
     Finally emit a __final__ item with the last answer.
-    Emits dict items. For compatibility, raw shapes are preserved:
-      - {"__stdout__": "<line>"}  (raw line)
-      - {"__step__": "<block>"}   (uniform Step/Thought/Tool/Args/Obs/Error block)
-      - {"__reasoning__": "<chunk>"} (normalized reasoning derived from stdout)
-      - {"__error__": "<error>"}  (if run errors)
-      - {"__final__": any}        (final result)
     """
     loop = asyncio.get_running_loop()
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
     stop_evt = threading.Event()
-    # 1) stdout/stderr live tee (lines go in as {"__stdout__": ...})
     qwriter = QueueWriter(q)
     # 2) memory poller
-    mem_thread = start_memory_poller(agent_to_use, q, stop_evt)
     # 3) agent runner (may or may not yield)
     def run_agent():
         final_result = None
         try:
-            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(qwriter):
                 used_iterable = False
-                # Preferred streaming signature
-                if hasattr(agent_to_use, "run") and callable(getattr(agent_to_use, "run")):
                     try:
                         res = agent_to_use.run(task, stream=True)
-                        if hasattr(res, "__iter__") and not isinstance(res, (str, bytes)):
                             used_iterable = True
                             for it in res:
                                 try:
-                                    q.put_nowait(it if isinstance(it, dict) else {"__stdout__": str(it)})
                                 except Exception:
                                     pass
-                            final_result = None  # may be contained in the iterable
                         else:
                             final_result = res
                     except TypeError:
-                        # run(stream=True) not supported -> fall through to other signatures
                         pass
                 if final_result is None and not used_iterable:
-                    # Try other common streaming variants
-                    for name in ("run_stream", "stream", "stream_run", "run_with_callback"):
                         fn = getattr(agent_to_use, name, None)
                         if callable(fn):
                             try:
                                 res = fn(task)
-                                if hasattr(res, "__iter__") and not isinstance(res, (str, bytes)):
                                     for it in res:
-                                        try:
-                                            q.put_nowait(it if isinstance(it, dict) else {"__stdout__": str(it)})
-                                        except Exception:
-                                            pass
                                     final_result = None
                                 else:
                                     final_result = res
@@ -157,7 +120,7 @@ async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
                                 # maybe callback signature
                                 def cb(item):
                                     try:
-                                        q.put_nowait(item if isinstance(item, dict) else {"__stdout__": str(item)})
                                     except Exception:
                                         pass
@@ -169,10 +132,17 @@ async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
                                     continue
                 if final_result is None and not used_iterable:
-                    # Last resort: synchronous APIs
-                    if hasattr(agent_to_use, "run") and callable(getattr(agent_to_use, "run")):
                         final_result = agent_to_use.run(task)
-                    elif hasattr(agent_to_use, "generate") and callable(getattr(agent_to_use, "generate")):
                         final_result = agent_to_use.generate(task)
                     elif callable(agent_to_use):
                         final_result = agent_to_use(task)
@@ -197,45 +167,20 @@ async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
                 pass
             stop_evt.set()
-    run_thread = threading.Thread(target=run_agent, name="agent-runner", daemon=True)
     run_thread.start()
-    # Async consumer: normalize stdout -> reasoning chunk; forward steps & others
     while True:
         item = await q.get()
-        # Normalize stdout lines into compact reasoning chunks, and also
-        # opportunistically extract a "Final answer:" if the agent prints one.
-        if isinstance(item, dict) and "__stdout__" in item:
-            line = item["__stdout__"]
-            # Add compact, filtered reasoning chunk (drop banners, system prompts)
-            chunk = _format_reasoning_chunk(line, tag="stdout", idx=0)
-            if chunk:
-                yield {"__reasoning__": chunk}
-            # Keep legacy raw stdout for existing consumers
-            yield item
-            # Opportunistic final answer capture from stdout
-            maybe_final = _maybe_parse_final_from_stdout(line)
-            if maybe_final:
-                # Don't end the stream here; consumer can decide how to use it
-                yield {"__maybe_final__": maybe_final}
-            continue
-        # Steps already serialized uniformly in the poller
-        if isinstance(item, dict) and "__step__" in item:
-            yield item
-            continue
-        # Pass-through for other shapes (__error__, iterable events, etc.)
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
-# ---------------------------------------------------------------------------
-# Utilities: scrub nested structures of <think> tags when proxying upstream
-# ---------------------------------------------------------------------------
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
@@ -246,10 +191,9 @@ def _recursively_scrub(obj):
     return obj
-# ---------------------------------------------------------------------------
-# Upstream proxy (OpenAI-compatible) with optional think-tag scrubbing
-# ---------------------------------------------------------------------------
-async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think: bool = False):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
@@ -262,9 +206,12 @@ async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think:
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
-                async with client.stream("POST", url, headers=headers, json=body) as resp:
                     resp.raise_for_status()
                     if scrub_think:
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
@@ -292,250 +239,6 @@ async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think:
                 except Exception:
                     pass
-            return fastapi.responses.JSONResponse(status_code=r.status_code, content=payload)
-# import asyncio
-# import contextlib
-# import os
-# import threading
-# import time
-# import typing
-#
-# import fastapi
-# import httpx
-#
-# from agent_server.helpers import sse_headers
-# from agent_server.sanitizing_think_tags import scrub_think_tags
-# from agent_server.std_tee import QueueWriter, _serialize_step
-#
-#
-# async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
-#     """
-#     Start the agent in a worker thread.
-#     Stream THREE sources of incremental data into the async generator:
-#       (1) live stdout/stderr lines,
-#       (2) newly appended memory steps (polled),
-#       (3) any iterable the agent may yield (if supported).
-#     Finally emit a __final__ item with the last answer.
-#     """
-#     loop = asyncio.get_running_loop()
-#     q: asyncio.Queue = asyncio.Queue()
-#     agent_to_use = agent_obj
-#
-#     stop_evt = threading.Event()
-#
-#     # 1) stdout/stderr live tee
-#     qwriter = QueueWriter(q)
-#
-#     # 2) memory poller
-#     def poll_memory():
-#         last_len = 0
-#         while not stop_evt.is_set():
-#             try:
-#                 steps = []
-#                 try:
-#                     # Common API: agent.memory.get_full_steps()
-#                     steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
-#                 except Exception:
-#                     # Fallbacks: different names across versions
-#                     steps = (
-#                         getattr(agent_to_use, "steps", [])
-#                         or getattr(agent_to_use, "memory", [])
-#                         or []
-#                     )
-#                 if steps is None:
-#                     steps = []
-#                 curr_len = len(steps)
-#                 if curr_len > last_len:
-#                     new = steps[last_len:curr_len]
-#                     last_len = curr_len
-#                     for s in new:
-#                         s_text = _serialize_step(s)
-#                         if s_text:
-#                             try:
-#                                 q.put_nowait({"__step__": s_text})
-#                             except Exception:
-#                                 pass
-#             except Exception:
-#                 pass
-#             time.sleep(0.10)  # 100 ms cadence
-#
-#     # 3) agent runner (may or may not yield)
-#     def run_agent():
-#         final_result = None
-#         try:
-#             with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
-#                 qwriter
-#             ):
-#                 used_iterable = False
-#                 if hasattr(agent_to_use, "run") and callable(
-#                     getattr(agent_to_use, "run")
-#                 ):
-#                     try:
-#                         res = agent_to_use.run(task, stream=True)
-#                         if hasattr(res, "__iter__") and not isinstance(
-#                             res, (str, bytes)
-#                         ):
-#                             used_iterable = True
-#                             for it in res:
-#                                 try:
-#                                     q.put_nowait(it)
-#                                 except Exception:
-#                                     pass
-#                             final_result = (
-#                                 None  # iterable may already contain the answer
-#                             )
-#                         else:
-#                             final_result = res
-#                     except TypeError:
-#                         # run(stream=True) not supported -> fall back
-#                         pass
-#
-#                 if final_result is None and not used_iterable:
-#                     # Try other common streaming signatures
-#                     for name in (
-#                         "run_stream",
-#                         "stream",
-#                         "stream_run",
-#                         "run_with_callback",
-#                     ):
-#                         fn = getattr(agent_to_use, name, None)
-#                         if callable(fn):
-#                             try:
-#                                 res = fn(task)
-#                                 if hasattr(res, "__iter__") and not isinstance(
-#                                     res, (str, bytes)
-#                                 ):
-#                                     for it in res:
-#                                         q.put_nowait(it)
-#                                     final_result = None
-#                                 else:
-#                                     final_result = res
-#                                 break
-#                             except TypeError:
-#                                 # maybe callback signature
-#                                 def cb(item):
-#                                     try:
-#                                         q.put_nowait(item)
-#                                     except Exception:
-#                                         pass
-#
-#                                 try:
-#                                     fn(task, cb)
-#                                     final_result = None
-#                                     break
-#                                 except Exception:
-#                                     continue
-#
-#                 if final_result is None and not used_iterable:
-#                     pass  # (typo guard removed below)
-#
-#                 if final_result is None and not used_iterable:
-#                     # Last resort: synchronous run()/generate()/callable
-#                     if hasattr(agent_to_use, "run") and callable(
-#                         getattr(agent_to_use, "run")
-#                     ):
-#                         final_result = agent_to_use.run(task)
-#                     elif hasattr(agent_to_use, "generate") and callable(
-#                         getattr(agent_to_use, "generate")
-#                     ):
-#                         final_result = agent_to_use.generate(task)
-#                     elif callable(agent_to_use):
-#                         final_result = agent_to_use(task)
-#
-#         except Exception as e:
-#             try:
-#                 qwriter.flush()
-#             except Exception:
-#                 pass
-#             try:
-#                 q.put_nowait({"__error__": str(e)})
-#             except Exception:
-#                 pass
-#         finally:
-#             try:
-#                 qwriter.flush()
-#             except Exception:
-#                 pass
-#             try:
-#                 q.put_nowait({"__final__": final_result})
-#             except Exception:
-#                 pass
-#             stop_evt.set()
-#
-#     # Kick off threads
-#     mem_thread = threading.Thread(target=poll_memory, daemon=True)
-#     run_thread = threading.Thread(target=run_agent, daemon=True)
-#     mem_thread.start()
-#     run_thread.start()
-#
-#     # Async consumer
-#     while True:
-#         item = await q.get()
-#         yield item
-#         if isinstance(item, dict) and "__final__" in item:
-#             break
-#
-#
-# def _recursively_scrub(obj):
-#     if isinstance(obj, str):
-#         return scrub_think_tags(obj)
-#     if isinstance(obj, dict):
-#         return {k: _recursively_scrub(v) for k, v in obj.items()}
-#     if isinstance(obj, list):
-#         return [_recursively_scrub(v) for v in obj]
-#     return obj
-#
-#
-# async def proxy_upstream_chat_completions(
-#     body: dict, stream: bool, scrub_think: bool = False
-# ):
-#     HF_TOKEN = os.getenv("OPENAI_API_KEY")
-#     headers = {
-#         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
-#         "Content-Type": "application/json",
-#     }
-#     UPSTREAM_BASE = os.getenv("UPSTREAM_OPENAI_BASE", "").rstrip("/")
-#     url = f"{UPSTREAM_BASE}/chat/completions"
-#
-#     if stream:
-#
-#         async def proxy_stream():
-#             async with httpx.AsyncClient(timeout=None) as client:
-#                 async with client.stream(
-#                     "POST", url, headers=headers, json=body
-#                 ) as resp:
-#                     resp.raise_for_status()
-#                     if scrub_think:
-#                         # Pull text segments, scrub tags, and yield bytes
-#                         async for txt in resp.aiter_text():
-#                             try:
-#                                 cleaned = scrub_think_tags(txt)
-#                                 yield cleaned.encode("utf-8")
-#                             except Exception:
-#                                 yield txt.encode("utf-8")
-#                     else:
-#                         async for chunk in resp.aiter_bytes():
-#                             yield chunk
-#
-#         return fastapi.responses.StreamingResponse(
-#             proxy_stream(), media_type="text/event-stream", headers=sse_headers()
-#         )
-#     else:
-#         async with httpx.AsyncClient(timeout=None) as client:
-#             r = await client.post(url, headers=headers, json=body)
-#             try:
-#                 payload = r.json()
-#             except Exception:
-#                 payload = {"status_code": r.status_code, "text": r.text}
-#
-#             if scrub_think:
-#                 try:
-#                     payload = _recursively_scrub(payload)
-#                 except Exception:
-#                     pass
-#
-#             return fastapi.responses.JSONResponse(
-#                 status_code=r.status_code, content=payload
-#             )

 import asyncio
 import contextlib
 import os
 import threading
 import time
+import typing
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
+from agent_server.std_tee import QueueWriter, _serialize_step
+async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
     """
     Start the agent in a worker thread.
     Stream THREE sources of incremental data into the async generator:
       (2) newly appended memory steps (polled),
       (3) any iterable the agent may yield (if supported).
     Finally emit a __final__ item with the last answer.
     """
     loop = asyncio.get_running_loop()
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
     stop_evt = threading.Event()
+    # 1) stdout/stderr live tee
     qwriter = QueueWriter(q)
     # 2) memory poller
+    def poll_memory():
+        last_len = 0
+        while not stop_evt.is_set():
+            try:
+                steps = []
+                try:
+                    # Common API: agent.memory.get_full_steps()
+                    steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
+                except Exception:
+                    # Fallbacks: different names across versions
+                    steps = (
+                        getattr(agent_to_use, "steps", [])
+                        or getattr(agent_to_use, "memory", [])
+                        or []
+                    )
+                if steps is None:
+                    steps = []
+                curr_len = len(steps)
+                if curr_len > last_len:
+                    new = steps[last_len:curr_len]
+                    last_len = curr_len
+                    for s in new:
+                        s_text = _serialize_step(s)
+                        if s_text:
+                            try:
+                                q.put_nowait({"__step__": s_text})
+                            except Exception:
+                                pass
+            except Exception:
+                pass
+            time.sleep(0.10)  # 100 ms cadence
     # 3) agent runner (may or may not yield)
     def run_agent():
         final_result = None
         try:
+            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
+                qwriter
+            ):
                 used_iterable = False
+                if hasattr(agent_to_use, "run") and callable(
+                    getattr(agent_to_use, "run")
+                ):
                     try:
                         res = agent_to_use.run(task, stream=True)
+                        if hasattr(res, "__iter__") and not isinstance(
+                            res, (str, bytes)
+                        ):
                             used_iterable = True
                             for it in res:
                                 try:
+                                    q.put_nowait(it)
                                 except Exception:
                                     pass
+                            final_result = (
+                                None  # iterable may already contain the answer
+                            )
                         else:
                             final_result = res
                     except TypeError:
+                        # run(stream=True) not supported -> fall back
                         pass
                 if final_result is None and not used_iterable:
+                    # Try other common streaming signatures
+                    for name in (
+                        "run_stream",
+                        "stream",
+                        "stream_run",
+                        "run_with_callback",
+                    ):
                         fn = getattr(agent_to_use, name, None)
                         if callable(fn):
                             try:
                                 res = fn(task)
+                                if hasattr(res, "__iter__") and not isinstance(
+                                    res, (str, bytes)
+                                ):
                                     for it in res:
+                                        q.put_nowait(it)
                                     final_result = None
                                 else:
                                     final_result = res
                                 # maybe callback signature
                                 def cb(item):
                                     try:
+                                        q.put_nowait(item)
                                     except Exception:
                                         pass
                                     continue
                 if final_result is None and not used_iterable:
+                    pass  # (typo guard removed below)
+                if final_result is None and not used_iterable:
+                    # Last resort: synchronous run()/generate()/callable
+                    if hasattr(agent_to_use, "run") and callable(
+                        getattr(agent_to_use, "run")
+                    ):
                         final_result = agent_to_use.run(task)
+                    elif hasattr(agent_to_use, "generate") and callable(
+                        getattr(agent_to_use, "generate")
+                    ):
                         final_result = agent_to_use.generate(task)
                     elif callable(agent_to_use):
                         final_result = agent_to_use(task)
                 pass
             stop_evt.set()
+    # Kick off threads
+    mem_thread = threading.Thread(target=poll_memory, daemon=True)
+    run_thread = threading.Thread(target=run_agent, daemon=True)
+    mem_thread.start()
     run_thread.start()
+    # Async consumer
     while True:
         item = await q.get()
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
     return obj
+async def proxy_upstream_chat_completions(
+    body: dict, stream: bool, scrub_think: bool = False
+):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
+                async with client.stream(
+                    "POST", url, headers=headers, json=body
+                ) as resp:
                     resp.raise_for_status()
                     if scrub_think:
+                        # Pull text segments, scrub tags, and yield bytes
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
                 except Exception:
                     pass
+            return fastapi.responses.JSONResponse(
+                status_code=r.status_code, content=payload
+            )

agent_server/std_tee.py CHANGED Viewed

@@ -1,305 +1,102 @@
-# agent_server/std_tee.py
-from __future__ import annotations
 import asyncio
 import io
 import json
 import re
 import threading
-import typing as t
-# ---- Think-tag scrubber (import with safe fallback) -------------------------
-try:
-    # Same-package relative import is preferred
-    from .sanitizing_think_tags import scrub_think_tags  # type: ignore
-except Exception:  # pragma: no cover
-    # No-op fallback if the project layout differs
-    def scrub_think_tags(s: str) -> str:
-        return s
-# ---- Formatting helpers (ANSI, noise, truncation) --------------------------
-_ANSI_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
-# Lines that should never be surfaced (system prompt and obvious boilerplate)
-_NOISY_PREFIXES = (
-    "OpenAIServerModel",
-    "Output message of the LLM",
-    "New run",
-    "─ Executing parsed code",
-    "╭", "╰", "│", "━", "─",
-    "System prompt", "SYSTEM PROMPT", "System Prompt",
-)
-# Very long single lines without enough alphanumerics are dropped
-_MIN_SIG_CHARS = re.compile(r"[A-Za-z0-9]{3,}")
-def _strip_ansi_and_think(s: str) -> str:
-    s = scrub_think_tags(s)
-    s = _ANSI_RE.sub("", s)
-    return s.strip()
-def _truncate(s: str, n: int) -> str:
-    s = s.strip()
-    if len(s) <= n:
-        return s
-    return s[:n] + "\n… [truncated]"
-def _clean_line(s: str) -> str:
-    return _strip_ansi_and_think(s).rstrip("\n")
-# ---- Public-ish helpers used by agent_streaming ----------------------------
-_FINAL_RE = re.compile(r"(?:^|\\b)Final\\s+answer:\\s*(.+)$", flags=re.IGNORECASE)
-def _maybe_parse_final_from_stdout(line: str) -> t.Optional[str]:
-    if not isinstance(line, str):
-        return None
-    line = _clean_line(line)
-    m = _FINAL_RE.search(line)
-    if not m:
-        return None
-    return _clean_line(m.group(1)) or None
-def _format_reasoning_chunk(text: str, tag: str, idx: int) -> str:
-    """
-    Lightweight formatter for reasoning stream from stdout.
-    - scrubs <think>…</think>
-    - strips ANSI
-    - drops banners/box drawing and 'System prompt …'
-    - drops very-long low-signal lines
-    """
-    stripped = _clean_line(text)
-    if not stripped:
-        return ""
-    if any(stripped.startswith(p) for p in _NOISY_PREFIXES):
-        return ""
-    if all(ch in " ─━╭╮╰╯│═·—-_=+•" for ch in stripped):
-        return ""
-    if len(stripped) > 240 and not _MIN_SIG_CHARS.search(stripped):
-        return ""
-    return f"{stripped}\n\n"
-def _serialize_step(step: t.Any) -> str:
     """
-    Compact, uniform serializer for 'step' objects from different agent libs.
-    Produces:
-      Step N
-      🧠 Thought: …
-      🛠️ Tool: …
-      📥 Args: …
-      📤 Observation: …
-      💥 Error: …
-      (plus code fences when code is present)
-    With truncation to keep the reveal parsimonious.
     """
-    parts: list[str] = []
-    # Step number (best-effort)
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
-    # Thought
     thought_val = getattr(step, "thought", None)
     if thought_val:
-        parts.append(f"🧠 Thought: {_truncate(_strip_ansi_and_think(str(thought_val)), 600)}")
-    # Tool
     tool_val = getattr(step, "tool", None)
     if tool_val:
-        parts.append(f"🛠️ Tool: {_truncate(_strip_ansi_and_think(str(tool_val)), 240)}")
-    # Code (if any)
     code_val = getattr(step, "code", None)
     if code_val:
-        code_str = _truncate(_strip_ansi_and_think(str(code_val)), 1600)
-        if code_str:
-            parts.append("```python\n" + code_str + "\n```")
-    # Args
     args = getattr(step, "args", None)
     if args:
         try:
-            arg_s = _truncate(_strip_ansi_and_think(json.dumps(args, ensure_ascii=False)), 800)
         except Exception:
-            arg_s = _truncate(_strip_ansi_and_think(str(args)), 800)
-        parts.append("📥 Args: " + arg_s)
-    # Error
     error = getattr(step, "error", None)
     if error:
-        parts.append(f"💥 Error: {_truncate(_strip_ansi_and_think(str(error)), 600)}")
-    # Observations
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
-        parts.append("📤 Observation:\n" + _truncate(_strip_ansi_and_think(obs_str), 1600))
-    # Final answer via explicit action type patterns (best-effort)
-    tname = getattr(step, "type_name", "") or getattr(step, "type", "") or ""
-    if isinstance(tname, str) and tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
-            return f"Final answer: {_strip_ansi_and_think(str(out))}"
-    # Fallback: parse repr
-    s = _strip_ansi_and_think(str(step))
-    m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
-    if m:
-        return f"Final answer: {m.group(1).strip()}"
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
-    return joined or s
-# ---- Tee for redirecting stdout/stderr into an asyncio.Queue ----------------
-class QueueWriter(io.TextIOBase):
-    """
-    Minimal text writer that sends lines into an asyncio.Queue.
-    Each non-empty line is enqueued as {"__stdout__": "<line>"}.
-    """
-    def __init__(self, q: "asyncio.Queue[dict]"):
-        self._q = q
-        self._buf = []
-    def write(self, s: str) -> int:
-        if not isinstance(s, str):
-            s = str(s)
-        # Buffer until newline; then emit line events
-        self._buf.append(s)
-        text = "".join(self._buf)
-        if "\n" in text:
-            lines = text.splitlines(keepends=True)
-            # keep last partial (no newline) in buffer
-            tail = "" if text.endswith("\n") else lines.pop()
-            for ln in lines:
-                clean = _clean_line(ln)
-                if clean:
-                    try:
-                        # downstream streamer will call _format_reasoning_chunk & co.
-                        self._q.put_nowait({"__stdout__": clean})
-                    except Exception:
-                        pass
-            self._buf = [tail]
-        return len(s)
-    def flush(self) -> None:
-        if self._buf:
-            text = "".join(self._buf)
-            self._buf.clear()
-            clean = _clean_line(text)
-            if clean:
-                try:
-                    self._q.put_nowait({"__stdout__": clean})
-                except Exception:
-                    pass
-    def isatty(self) -> bool:  # for libraries that test it
-        return False
-# import asyncio
-# import io
-# import json
-# import re
-# import threading
-#
-# from agent_server.sanitizing_think_tags import scrub_think_tags
-#
-#
-# class QueueWriter(io.TextIOBase):
-#     """
-#     File-like object that pushes each write to an asyncio.Queue immediately.
-#     """
-#
-#     def __init__(self, q: "asyncio.Queue"):
-#         self.q = q
-#         self._lock = threading.Lock()
-#         self._buf = []  # accumulate until newline to reduce spam
-#
-#     def write(self, s: str):
-#         if not s:
-#             return 0
-#         with self._lock:
-#             self._buf.append(s)
-#             # flush on newline to keep granularity reasonable
-#             if "\n" in s:
-#                 chunk = "".join(self._buf)
-#                 self._buf.clear()
-#                 try:
-#                     self.q.put_nowait({"__stdout__": chunk})
-#                 except Exception:
-#                     pass
-#         return len(s)
-#
-#     def flush(self):
-#         with self._lock:
-#             if self._buf:
-#                 chunk = "".join(self._buf)
-#                 self._buf.clear()
-#                 try:
-#                     self.q.put_nowait({"__stdout__": chunk})
-#                 except Exception:
-#                     pass
-#
-#
-# def _serialize_step(step) -> str:
-#     """
-#     Best-effort pretty string for a smolagents MemoryStep / ActionStep.
-#     Works even if attributes are missing on some versions.
-#     """
-#     parts = []
-#     sn = getattr(step, "step_number", None)
-#     if sn is not None:
-#         parts.append(f"Step {sn}")
-#     thought_val = getattr(step, "thought", None)
-#     if thought_val:
-#         parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
-#     tool_val = getattr(step, "tool", None)
-#     if tool_val:
-#         parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
-#     code_val = getattr(step, "code", None)
-#     if code_val:
-#         code_str = scrub_think_tags(str(code_val)).strip()
-#         parts.append("```python\n" + code_str + "\n```")
-#     args = getattr(step, "args", None)
-#     if args:
-#         try:
-#             parts.append(
-#                 "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
-#             )
-#         except Exception:
-#             parts.append("Args: " + scrub_think_tags(str(args)))
-#     error = getattr(step, "error", None)
-#     if error:
-#         parts.append(f"Error: {scrub_think_tags(str(error))}")
-#     obs = getattr(step, "observations", None)
-#     if obs is not None:
-#         if isinstance(obs, (list, tuple)):
-#             obs_str = "\n".join(map(str, obs))
-#         else:
-#             obs_str = str(obs)
-#         parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
-#     # If this looks like a FinalAnswer step object, surface a clean final answer
-#     try:
-#         tname = type(step).__name__
-#     except Exception:
-#         tname = ""
-#     if tname.lower().startswith("finalanswer"):
-#         out = getattr(step, "output", None)
-#         if out is not None:
-#             return f"Final answer: {scrub_think_tags(str(out)).strip()}"
-#         # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
-#         s = scrub_think_tags(str(step))
-#         m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
-#         if m:
-#             return f"Final answer: {m.group(1).strip()}"
-#     # If the only content would be an object repr like FinalAnswerStep(...), drop it;
-#     # a cleaner "Final answer: ..." will come from the rule above or stdout.
-#     joined = "\n".join(parts).strip()
-#     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
-#         return ""
-#     return joined or scrub_think_tags(str(step))

 import asyncio
 import io
 import json
 import re
 import threading
+from agent_server.sanitizing_think_tags import scrub_think_tags
+class QueueWriter(io.TextIOBase):
+    """
+    File-like object that pushes each write to an asyncio.Queue immediately.
+    """
+    def __init__(self, q: "asyncio.Queue"):
+        self.q = q
+        self._lock = threading.Lock()
+        self._buf = []  # accumulate until newline to reduce spam
+    def write(self, s: str):
+        if not s:
+            return 0
+        with self._lock:
+            self._buf.append(s)
+            # flush on newline to keep granularity reasonable
+            if "\n" in s:
+                chunk = "".join(self._buf)
+                self._buf.clear()
+                try:
+                    self.q.put_nowait({"__stdout__": chunk})
+                except Exception:
+                    pass
+        return len(s)
+    def flush(self):
+        with self._lock:
+            if self._buf:
+                chunk = "".join(self._buf)
+                self._buf.clear()
+                try:
+                    self.q.put_nowait({"__stdout__": chunk})
+                except Exception:
+                    pass
+def _serialize_step(step) -> str:
     """
+    Best-effort pretty string for a smolagents MemoryStep / ActionStep.
+    Works even if attributes are missing on some versions.
     """
+    parts = []
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
     thought_val = getattr(step, "thought", None)
     if thought_val:
+        parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
     tool_val = getattr(step, "tool", None)
     if tool_val:
+        parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
     code_val = getattr(step, "code", None)
     if code_val:
+        code_str = scrub_think_tags(str(code_val)).strip()
+        parts.append("```python\n" + code_str + "\n```")
     args = getattr(step, "args", None)
     if args:
         try:
+            parts.append(
+                "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
+            )
         except Exception:
+            parts.append("Args: " + scrub_think_tags(str(args)))
     error = getattr(step, "error", None)
     if error:
+        parts.append(f"Error: {scrub_think_tags(str(error))}")
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
+        parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
+    # If this looks like a FinalAnswer step object, surface a clean final answer
+    try:
+        tname = type(step).__name__
+    except Exception:
+        tname = ""
+    if tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
+            return f"Final answer: {scrub_think_tags(str(out)).strip()}"
+        # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
+        s = scrub_think_tags(str(step))
+        m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
+        if m:
+            return f"Final answer: {m.group(1).strip()}"
+    # If the only content would be an object repr like FinalAnswerStep(...), drop it;
+    # a cleaner "Final answer: ..." will come from the rule above or stdout.
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
+    return joined or scrub_think_tags(str(step))

agents/generator_and_critic.py CHANGED Viewed

@@ -7,66 +7,22 @@ from smolagents import CodeAgent, ToolCallingAgent
 # ---------------- Agent Prompts ----------------
 GENERATOR_INSTRUCTIONS = """
-You are the Generator/Refiner.
-Goal
-- Produce a concise draft that strictly satisfies the caller's constraints.
-- Use the managed agent named "critic_agent" to validate your draft.
-- Repeat: draft → call critic_agent → if ok, return draft; else revise and re-check.
-- Output ONLY the final draft text (no JSON, no commentary).
-Constraints to enforce in every revision:
-- Respect the maximum word count.
-- Use bullet points where appropriate (lines starting with '-' or '•').
-- Include all required phrases verbatim.
-- End with a line starting with 'Next steps:'.
-Implementation guidance (you write/run the code):
-- Use a small loop with a bounded number of rounds provided by the caller.
-- Call critic_agent with a single string payload that contains:
-  - the DRAFT
-  - the list of required phrases
-  - the word limit
-  - any other constraints you need
-- critic_agent returns a JSON object with keys: ok (bool), violations (list[str]), suggestions (str).
-- If ok == true, immediately return the current draft (and stop).
-- Otherwise, revise the draft to fix ALL violations, then re-check.
-Important:
-- Do NOT print anything except the final draft at the end.
-- Avoid verbose interleaved logs; keep code minimal.
 """
 CRITIC_INSTRUCTIONS = """
-You are the Critic.
-Input
-- A single string payload that includes:
-  - DRAFT text
-  - explicit constraints (e.g., max_words, must_include list, bullets rule, ending 'Next steps:')
-Task
-- Evaluate the DRAFT against the constraints and general quality (clarity, correctness, structure, tone).
-- Return ONLY compact JSON (no text outside JSON) with shape:
-{
-  "ok": true|false,
-  "violations": ["short, concrete issues..."],
-  "suggestions": "one short paragraph of actionable guidance"
-}
-Rules
-- ok=true ONLY if all constraints are satisfied AND the draft reads clearly.
-- Keep violations terse and actionable.
-- Keep suggestions short and prescriptive.
 """
 # ---------------- Factory ----------------
 def generate_generator_with_managed_critic(
     *,
     gen_max_steps: int = 12,
-    crt_max_steps: int = 2,
-) -> CodeAgent:
     """
     Returns a CodeAgent (generator) that manages a critic sub-agent.
     The critic is exposed to the generator as a managed agent (callable like a tool).
@@ -82,13 +38,13 @@ def generate_generator_with_managed_critic(
         tools=[],  # critic needs no tools; it just returns JSON text
         model=model,
         name="critic_agent",
-        description="Evaluates drafts against constraints and returns a compact JSON report.",
         instructions=CRITIC_INSTRUCTIONS,
         add_base_tools=False,
         max_steps=crt_max_steps,
     )
-    generator = CodeAgent(
         tools=[],  # keep toolbox minimal
         model=model,
         name="generator_with_managed_critic",

 # ---------------- Agent Prompts ----------------
 GENERATOR_INSTRUCTIONS = """
+You are the Generator. Your goal is to produce a concise draft that strictly satisfies the caller's constraints.
+Use the managed agent named "critic_agent" to iteratively improve your draft.
 """
 CRITIC_INSTRUCTIONS = """
+You are the Critic. Your job is to provide constructive, actionable feedback on drafts produced by the Generator.
+You should not iterate or make multiple tool calls.
+Instead, simply call the final answer tool with your evaluation and feedback.
 """
 # ---------------- Factory ----------------
 def generate_generator_with_managed_critic(
     *,
     gen_max_steps: int = 12,
+    crt_max_steps: int = 1,
+) -> ToolCallingAgent:
     """
     Returns a CodeAgent (generator) that manages a critic sub-agent.
     The critic is exposed to the generator as a managed agent (callable like a tool).
         tools=[],  # critic needs no tools; it just returns JSON text
         model=model,
         name="critic_agent",
+        description="Evaluates drafts against constraints and returns a compact set of recommendations report.",
         instructions=CRITIC_INSTRUCTIONS,
         add_base_tools=False,
         max_steps=crt_max_steps,
     )
+    generator = ToolCallingAgent(
         tools=[],  # keep toolbox minimal
         model=model,
         name="generator_with_managed_critic",