Chris4K commited on
Commit
7cadb5b
·
verified ·
1 Parent(s): eac7e08

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +7 -0
  2. README.md +84 -7
  3. main.py +586 -0
  4. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ COPY main.py .
6
+ EXPOSE 7860
7
+ CMD ["python", "main.py"]
README.md CHANGED
@@ -1,11 +1,88 @@
1
  ---
2
- title: Agent Trace
3
- emoji: 🏢
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: docker
7
- pinned: false
8
- short_description: Logging and tracing
 
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: agent-trace — FORGE Telemetry
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: docker
7
+ pinned: true
8
+ license: mit
9
+ short_description: Telemetry backbone for the FORGE AI agent ecosystem
10
  ---
11
 
12
+ # 📊 agent-trace
13
+ ### FORGE Telemetry Backbone
14
+
15
+ Every agent in the FORGE ecosystem sends events here.
16
+ Owns: ingest, retention, aggregation, real-time dashboard.
17
+
18
+ ## What it stores
19
+
20
+ Every agent action produces a **trace event**:
21
+
22
+ ```json
23
+ {
24
+ "agent": "nexus",
25
+ "event_type": "llm_call",
26
+ "session_id": "abc123",
27
+ "task_id": "task-xyz",
28
+ "status": "ok",
29
+ "latency_ms": 1240,
30
+ "tokens_in": 512,
31
+ "tokens_out": 128,
32
+ "model": "qwen/qwen3.5-35b-a3b",
33
+ "reward": null,
34
+ "payload": {}
35
+ }
36
+ ```
37
+
38
+ ## Event types
39
+
40
+ `llm_call` · `tool_use` · `react_step` · `skill_load` · `kanban_move` · `slot_event` · `self_reflect` · `reward_signal` · `error` · `custom`
41
+
42
+ ## REST API
43
+
44
+ ```
45
+ POST /api/trace Ingest single event
46
+ POST /api/traces/batch Ingest array of events
47
+ GET /api/traces Query with filters
48
+ GET /api/stats Aggregated stats (window_hours param)
49
+ GET /api/agents List active agents
50
+ GET /api/session/{id} Full session timeline
51
+ PATCH /api/trace/{id}/reward Assign reward (called by agent-learn)
52
+ DELETE /api/purge Purge old traces
53
+ ```
54
+
55
+ ## MCP
56
+
57
+ ```
58
+ GET /mcp/sse SSE transport
59
+ POST /mcp JSON-RPC 2.0
60
+
61
+ Tools: trace_ingest, trace_query, trace_stats, trace_agents, trace_session, trace_reward
62
+ ```
63
+
64
+ ## Secrets
65
+
66
+ | Key | Description |
67
+ |-----|-------------|
68
+ | `TRACE_KEY` | Optional write auth key (X-Trace-Key header) |
69
+ | `RETAIN_DAYS` | Retention period in days (default: 7) |
70
+
71
+ ## Integration
72
+
73
+ Every FORGE space sends traces here. Add to any agent:
74
+
75
+ ```python
76
+ import requests
77
+
78
+ TRACE_URL = "https://chris4k-agent-trace.hf.space"
79
+
80
+ def trace(agent, event_type, **kwargs):
81
+ requests.post(f"{TRACE_URL}/api/trace", json={
82
+ "agent": agent,
83
+ "event_type": event_type,
84
+ **kwargs
85
+ }, timeout=3)
86
+ ```
87
+
88
+ Built by [Chris4K](https://huggingface.co/Chris4K) — ki-fusion-labs.de
main.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agent-trace v2 — FORGE Telemetry Backbone
3
+ Every agent sends events here.
4
+ Owns: ingest, retention, aggregation, real-time dashboard.
5
+ agent-learn reads rewards from here.
6
+ """
7
+
8
+ import asyncio, json, os, sqlite3, time, uuid
9
+ from contextlib import asynccontextmanager
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ import uvicorn
14
+ from fastapi import FastAPI, HTTPException, Query, Request
15
+ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Config
19
+ # ---------------------------------------------------------------------------
20
+ DB_PATH = Path(os.getenv("TRACE_DB", "/tmp/trace.db"))
21
+ PORT = int(os.getenv("PORT", "7860"))
22
+ INGEST_KEY = os.getenv("TRACE_KEY", "")
23
+ RETAIN_DAYS = int(os.getenv("RETAIN_DAYS", "7"))
24
+
25
+ VALID_TYPES = {"llm_call","tool_use","react_step","skill_load","kanban_move",
26
+ "slot_event","self_reflect","reward_signal","error","custom"}
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Database
30
+ # ---------------------------------------------------------------------------
31
+ def get_db():
32
+ conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
33
+ conn.row_factory = sqlite3.Row
34
+ conn.execute("PRAGMA journal_mode=WAL")
35
+ conn.execute("PRAGMA synchronous=NORMAL")
36
+ return conn
37
+
38
+ def init_db():
39
+ conn = get_db()
40
+ conn.executescript("""
41
+ CREATE TABLE IF NOT EXISTS traces (
42
+ id TEXT PRIMARY KEY,
43
+ agent TEXT NOT NULL DEFAULT 'unknown',
44
+ session_id TEXT NOT NULL DEFAULT '',
45
+ task_id TEXT NOT NULL DEFAULT '',
46
+ event_type TEXT NOT NULL DEFAULT 'custom',
47
+ status TEXT NOT NULL DEFAULT 'ok',
48
+ latency_ms REAL,
49
+ tokens_in INTEGER,
50
+ tokens_out INTEGER,
51
+ model TEXT,
52
+ tool_name TEXT,
53
+ skill_id TEXT,
54
+ reward REAL,
55
+ error_msg TEXT,
56
+ payload TEXT NOT NULL DEFAULT '{}',
57
+ tags TEXT NOT NULL DEFAULT '[]',
58
+ ts REAL NOT NULL
59
+ );
60
+ CREATE INDEX IF NOT EXISTS idx_tr_ts ON traces(ts DESC);
61
+ CREATE INDEX IF NOT EXISTS idx_tr_agent ON traces(agent);
62
+ CREATE INDEX IF NOT EXISTS idx_tr_type ON traces(event_type);
63
+ CREATE INDEX IF NOT EXISTS idx_tr_sess ON traces(session_id);
64
+ CREATE INDEX IF NOT EXISTS idx_tr_task ON traces(task_id);
65
+ CREATE INDEX IF NOT EXISTS idx_tr_reward ON traces(reward);
66
+
67
+ CREATE TABLE IF NOT EXISTS hourly_stats (
68
+ hour TEXT NOT NULL,
69
+ agent TEXT NOT NULL,
70
+ event_type TEXT NOT NULL,
71
+ count INTEGER NOT NULL DEFAULT 0,
72
+ errors INTEGER NOT NULL DEFAULT 0,
73
+ total_lat REAL NOT NULL DEFAULT 0,
74
+ total_tok INTEGER NOT NULL DEFAULT 0,
75
+ PRIMARY KEY (hour, agent, event_type)
76
+ );
77
+ """)
78
+ conn.commit(); conn.close()
79
+
80
+ def purge_old():
81
+ cutoff = time.time() - RETAIN_DAYS * 86400
82
+ conn = get_db()
83
+ n = conn.execute("DELETE FROM traces WHERE ts < ?", (cutoff,)).rowcount
84
+ conn.execute("DELETE FROM hourly_stats WHERE hour < ?",
85
+ (time.strftime("%Y-%m-%dT%H", time.gmtime(cutoff)),))
86
+ conn.commit(); conn.close()
87
+ return n
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Write
91
+ # ---------------------------------------------------------------------------
92
+ def ingest_trace(data: dict) -> str:
93
+ event_type = str(data.get("event_type","custom"))
94
+ if event_type not in VALID_TYPES: event_type = "custom"
95
+ status = "error" if str(data.get("status","ok")).lower()=="error" else "ok"
96
+ latency_ms = float(data["latency_ms"]) if data.get("latency_ms") is not None else None
97
+ tokens_in = int(data["tokens_in"]) if data.get("tokens_in") is not None else None
98
+ tokens_out = int(data["tokens_out"]) if data.get("tokens_out") is not None else None
99
+ reward = float(data["reward"]) if data.get("reward") is not None else None
100
+ ts = float(data.get("ts") or time.time())
101
+ trace_id = str(data.get("id") or uuid.uuid4())
102
+ payload = json.dumps(data.get("payload") or {})
103
+ tags = json.dumps(data.get("tags") or [])
104
+ agent = str(data.get("agent","unknown"))[:64]
105
+
106
+ conn = get_db()
107
+ conn.execute("""
108
+ INSERT OR IGNORE INTO traces
109
+ (id,agent,session_id,task_id,event_type,status,
110
+ latency_ms,tokens_in,tokens_out,model,tool_name,skill_id,
111
+ reward,error_msg,payload,tags,ts)
112
+ VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
113
+ (trace_id, agent,
114
+ str(data.get("session_id",""))[:128],
115
+ str(data.get("task_id",""))[:128],
116
+ event_type, status, latency_ms, tokens_in, tokens_out,
117
+ str(data.get("model","") or "")[:128],
118
+ str(data.get("tool_name","") or "")[:128],
119
+ str(data.get("skill_id","") or "")[:128],
120
+ reward, str(data.get("error_msg","") or "")[:1024],
121
+ payload, tags, ts))
122
+
123
+ hour = time.strftime("%Y-%m-%dT%H", time.gmtime(ts))
124
+ total_tok = (tokens_in or 0) + (tokens_out or 0)
125
+ conn.execute("""
126
+ INSERT INTO hourly_stats (hour,agent,event_type,count,errors,total_lat,total_tok)
127
+ VALUES (?,?,?,1,?,?,?)
128
+ ON CONFLICT(hour,agent,event_type) DO UPDATE SET
129
+ count=count+1, errors=errors+excluded.errors,
130
+ total_lat=total_lat+excluded.total_lat,
131
+ total_tok=total_tok+excluded.total_tok""",
132
+ (hour, agent, event_type,
133
+ 1 if status=="error" else 0,
134
+ latency_ms or 0, total_tok))
135
+ conn.commit(); conn.close()
136
+ return trace_id
137
+
138
+ def ingest_batch(events: list) -> dict:
139
+ ok, errs = 0, []
140
+ for i, ev in enumerate(events):
141
+ try:
142
+ ingest_trace(ev); ok += 1
143
+ except Exception as e:
144
+ errs.append({"index":i,"error":str(e)})
145
+ return {"ingested":ok,"errors":errs}
146
+
147
+ def update_reward(trace_id: str, reward: float) -> bool:
148
+ conn = get_db()
149
+ n = conn.execute("UPDATE traces SET reward=? WHERE id=?", (reward, trace_id)).rowcount
150
+ conn.commit(); conn.close()
151
+ return n > 0
152
+
153
+ # ---------------------------------------------------------------------------
154
+ # Read
155
+ # ---------------------------------------------------------------------------
156
+ def query_traces(agent="", event_type="", session_id="", task_id="",
157
+ status="", has_reward=False, since_ts=0.0,
158
+ limit=100, offset=0) -> list:
159
+ conn = get_db()
160
+ where, params = [], []
161
+ if agent: where.append("agent=?"); params.append(agent)
162
+ if event_type: where.append("event_type=?"); params.append(event_type)
163
+ if session_id: where.append("session_id=?"); params.append(session_id)
164
+ if task_id: where.append("task_id=?"); params.append(task_id)
165
+ if status: where.append("status=?"); params.append(status)
166
+ if has_reward: where.append("reward IS NOT NULL")
167
+ if since_ts: where.append("ts >= ?"); params.append(since_ts)
168
+ sql = ("SELECT * FROM traces" +
169
+ (f" WHERE {' AND '.join(where)}" if where else "") +
170
+ " ORDER BY ts DESC LIMIT ? OFFSET ?")
171
+ rows = conn.execute(sql, params+[limit,offset]).fetchall()
172
+ conn.close()
173
+ result = []
174
+ for r in rows:
175
+ d = dict(r)
176
+ for f in ("payload","tags"):
177
+ try: d[f] = json.loads(d[f])
178
+ except Exception: pass
179
+ result.append(d)
180
+ return result
181
+
182
+ def get_agents() -> list:
183
+ conn = get_db()
184
+ rows = conn.execute("SELECT DISTINCT agent FROM traces ORDER BY agent").fetchall()
185
+ conn.close()
186
+ return [r[0] for r in rows]
187
+
188
+ def get_stats(window_hours=24) -> dict:
189
+ conn = get_db()
190
+ since = time.time() - window_hours * 3600
191
+ total = conn.execute("SELECT COUNT(*) FROM traces WHERE ts>=?",(since,)).fetchone()[0]
192
+ errors = conn.execute("SELECT COUNT(*) FROM traces WHERE ts>=? AND status='error'",(since,)).fetchone()[0]
193
+ by_agent = conn.execute(
194
+ "SELECT agent, COUNT(*) as cnt, SUM(CASE WHEN status='error' THEN 1 ELSE 0 END) as errs "
195
+ "FROM traces WHERE ts>=? GROUP BY agent ORDER BY cnt DESC",(since,)).fetchall()
196
+ by_type = conn.execute(
197
+ "SELECT event_type, COUNT(*) as cnt FROM traces WHERE ts>=? GROUP BY event_type ORDER BY cnt DESC",(since,)).fetchall()
198
+ avg_lat = conn.execute("SELECT AVG(latency_ms) FROM traces WHERE ts>=? AND latency_ms IS NOT NULL",(since,)).fetchone()[0]
199
+ total_tok = conn.execute(
200
+ "SELECT SUM(COALESCE(tokens_in,0)+COALESCE(tokens_out,0)) FROM traces WHERE ts>=?",(since,)).fetchone()[0]
201
+ rw = conn.execute(
202
+ "SELECT COUNT(*),AVG(reward),MIN(reward),MAX(reward) FROM traces WHERE ts>=? AND reward IS NOT NULL",(since,)).fetchone()
203
+ since_hour = time.strftime("%Y-%m-%dT%H", time.gmtime(since))
204
+ hourly = conn.execute(
205
+ "SELECT hour, SUM(count) as total, SUM(errors) as errs "
206
+ "FROM hourly_stats WHERE hour>=? GROUP BY hour ORDER BY hour",(since_hour,)).fetchall()
207
+ conn.close()
208
+ return {
209
+ "window_hours": window_hours,
210
+ "total_events": total,
211
+ "error_count": errors,
212
+ "error_rate": round(errors/max(total,1)*100,2),
213
+ "avg_latency_ms": round(avg_lat or 0,1),
214
+ "total_tokens": total_tok or 0,
215
+ "by_agent": [dict(r) for r in by_agent],
216
+ "by_event_type": [dict(r) for r in by_type],
217
+ "reward_stats": {"count":rw[0]or 0,"avg":round(rw[1]or 0,4),"min":rw[2],"max":rw[3]},
218
+ "hourly_trend": [dict(r) for r in hourly],
219
+ }
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # Seed demo data
223
+ # ---------------------------------------------------------------------------
224
+ def seed_demo():
225
+ conn = get_db()
226
+ n = conn.execute("SELECT COUNT(*) FROM traces").fetchone()[0]
227
+ conn.close()
228
+ if n > 0: return
229
+ import random; random.seed(42)
230
+ now = time.time()
231
+ agents = ["nexus","pulse","kanban","memory","relay"]
232
+ types = list(VALID_TYPES)
233
+ for _ in range(80):
234
+ a = random.choice(agents)
235
+ et = random.choice(types)
236
+ ingest_trace({
237
+ "agent": a,
238
+ "event_type": et,
239
+ "session_id": f"demo-{random.randint(1,8)}",
240
+ "task_id": f"task-{random.randint(1,15)}",
241
+ "status": "error" if random.random()<0.08 else "ok",
242
+ "latency_ms": round(random.uniform(80,3500),1) if et=="llm_call" else round(random.uniform(5,400),1),
243
+ "tokens_in": random.randint(100,2000) if et=="llm_call" else None,
244
+ "tokens_out": random.randint(50,800) if et=="llm_call" else None,
245
+ "model": "qwen/qwen3.5-35b-a3b" if a=="nexus" else "",
246
+ "tool_name": random.choice(["web_search","calculator","kanban_create","slot_reserve"]) if et=="tool_use" else "",
247
+ "reward": round(random.uniform(-0.3,1.0),3) if random.random()<0.3 else None,
248
+ "ts": now - random.uniform(0, 23*3600),
249
+ })
250
+
251
+ # ---------------------------------------------------------------------------
252
+ # MCP
253
+ # ---------------------------------------------------------------------------
254
+ MCP_TOOLS = [
255
+ {"name":"trace_ingest","description":"Ingest a trace event into FORGE telemetry.",
256
+ "inputSchema":{"type":"object","required":["agent","event_type"],
257
+ "properties":{"agent":{"type":"string"},"event_type":{"type":"string"},
258
+ "session_id":{"type":"string"},"task_id":{"type":"string"},
259
+ "status":{"type":"string"},"latency_ms":{"type":"number"},
260
+ "tokens_in":{"type":"integer"},"tokens_out":{"type":"integer"},
261
+ "model":{"type":"string"},"tool_name":{"type":"string"},
262
+ "skill_id":{"type":"string"},"error_msg":{"type":"string"},
263
+ "payload":{"type":"object"},"tags":{"type":"array"}}}},
264
+ {"name":"trace_query","description":"Query trace events with filters.",
265
+ "inputSchema":{"type":"object","properties":{"agent":{"type":"string"},"event_type":{"type":"string"},
266
+ "session_id":{"type":"string"},"task_id":{"type":"string"},
267
+ "status":{"type":"string"},"since_hours":{"type":"number"},
268
+ "limit":{"type":"integer"}}}},
269
+ {"name":"trace_stats","description":"Get aggregated telemetry statistics.",
270
+ "inputSchema":{"type":"object","properties":{"window_hours":{"type":"integer","default":24}}}},
271
+ {"name":"trace_agents","description":"List all agents that have sent telemetry.",
272
+ "inputSchema":{"type":"object","properties":{}}},
273
+ {"name":"trace_session","description":"Get complete event timeline for a session.",
274
+ "inputSchema":{"type":"object","required":["session_id"],"properties":{"session_id":{"type":"string"}}}},
275
+ {"name":"trace_reward","description":"Assign reward score to a trace event (called by agent-learn).",
276
+ "inputSchema":{"type":"object","required":["trace_id","reward"],
277
+ "properties":{"trace_id":{"type":"string"},"reward":{"type":"number"}}}},
278
+ ]
279
+
280
+ def handle_mcp(method, params, req_id):
281
+ def ok(r): return {"jsonrpc":"2.0","id":req_id,"result":r}
282
+ def txt(d): return ok({"content":[{"type":"text","text":json.dumps(d)}]})
283
+ if method=="initialize":
284
+ return ok({"protocolVersion":"2024-11-05","serverInfo":{"name":"agent-trace","version":"1.0.0"},"capabilities":{"tools":{}}})
285
+ if method=="tools/list": return ok({"tools":MCP_TOOLS})
286
+ if method=="tools/call":
287
+ n, a = params.get("name",""), params.get("arguments",{})
288
+ if n=="trace_ingest":
289
+ tid = ingest_trace(a); return txt({"ok":True,"trace_id":tid})
290
+ if n=="trace_query":
291
+ ev = query_traces(agent=a.get("agent",""),event_type=a.get("event_type",""),
292
+ session_id=a.get("session_id",""),task_id=a.get("task_id",""),
293
+ status=a.get("status",""),since_ts=time.time()-a.get("since_hours",1)*3600,
294
+ limit=min(a.get("limit",50),200))
295
+ return txt({"events":ev,"count":len(ev)})
296
+ if n=="trace_stats": return txt(get_stats(a.get("window_hours",24)))
297
+ if n=="trace_agents": return txt({"agents":get_agents()})
298
+ if n=="trace_session":
299
+ return txt({"session_id":a["session_id"],"events":query_traces(session_id=a["session_id"],limit=500)})
300
+ if n=="trace_reward":
301
+ return txt({"ok":update_reward(a["trace_id"],float(a["reward"])),"trace_id":a["trace_id"]})
302
+ return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Unknown tool: {n}"}}
303
+ if method in ("notifications/initialized","notifications/cancelled"): return None
304
+ return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Method not found: {method}"}}
305
+
306
+ # ---------------------------------------------------------------------------
307
+ # App
308
+ # ---------------------------------------------------------------------------
309
+ @asynccontextmanager
310
+ async def lifespan(app):
311
+ init_db(); seed_demo()
312
+ asyncio.create_task(_purge_loop())
313
+ yield
314
+
315
+ async def _purge_loop():
316
+ while True:
317
+ await asyncio.sleep(3600); purge_old()
318
+
319
+ app = FastAPI(title="agent-trace", version="1.0.0", lifespan=lifespan)
320
+
321
+ def _auth(r): return not INGEST_KEY or r.headers.get("x-trace-key","") == INGEST_KEY
322
+
323
+ @app.post("/api/trace", status_code=201)
324
+ async def api_ingest(request: Request):
325
+ if not _auth(request): raise HTTPException(403,"Invalid X-Trace-Key")
326
+ body = await request.json()
327
+ if isinstance(body, list): return JSONResponse(ingest_batch(body))
328
+ tid = ingest_trace(body)
329
+ return JSONResponse({"ok":True,"trace_id":tid})
330
+
331
+ @app.post("/api/traces/batch", status_code=201)
332
+ async def api_batch(request: Request):
333
+ if not _auth(request): raise HTTPException(403,"Invalid X-Trace-Key")
334
+ body = await request.json()
335
+ return JSONResponse(ingest_batch(body if isinstance(body,list) else [body]))
336
+
337
+ @app.get("/api/traces")
338
+ async def api_query(
339
+ agent:str=Query(""), event_type:str=Query(""),
340
+ session_id:str=Query(""), task_id:str=Query(""),
341
+ status:str=Query(""), has_reward:bool=Query(False),
342
+ since_hours:float=Query(24.0), limit:int=Query(100,le=1000), offset:int=Query(0)):
343
+ ev = query_traces(agent=agent, event_type=event_type, session_id=session_id,
344
+ task_id=task_id, status=status, has_reward=has_reward,
345
+ since_ts=time.time()-since_hours*3600, limit=limit, offset=offset)
346
+ return JSONResponse({"events":ev,"count":len(ev)})
347
+
348
+ @app.get("/api/stats")
349
+ async def api_stats(window_hours:int=Query(24)):
350
+ return JSONResponse(get_stats(window_hours))
351
+
352
+ @app.get("/api/agents")
353
+ async def api_agents(): return JSONResponse({"agents":get_agents()})
354
+
355
+ @app.get("/api/session/{sid}")
356
+ async def api_session(sid:str):
357
+ return JSONResponse({"session_id":sid,"events":query_traces(session_id=sid,limit=500)})
358
+
359
+ @app.patch("/api/trace/{trace_id}/reward")
360
+ async def api_reward(trace_id:str, request:Request):
361
+ if not _auth(request): raise HTTPException(403,"Invalid X-Trace-Key")
362
+ body = await request.json()
363
+ updated = update_reward(trace_id, float(body.get("reward",0)))
364
+ return JSONResponse({"ok":updated,"trace_id":trace_id})
365
+
366
+ @app.delete("/api/purge")
367
+ async def api_purge(request:Request):
368
+ if not _auth(request): raise HTTPException(403,"Invalid X-Trace-Key")
369
+ return JSONResponse({"ok":True,"deleted":purge_old(),"retain_days":RETAIN_DAYS})
370
+
371
+ @app.get("/api/health")
372
+ async def api_health():
373
+ conn=get_db(); n=conn.execute("SELECT COUNT(*) FROM traces").fetchone()[0]; conn.close()
374
+ return JSONResponse({"ok":True,"total_traces":n,"retain_days":RETAIN_DAYS})
375
+
376
+ @app.get("/mcp/sse")
377
+ async def mcp_sse(request:Request):
378
+ async def gen():
379
+ yield f"data: {json.dumps({'jsonrpc':'2.0','method':'connected','params':{}})}\n\n"
380
+ yield f"data: {json.dumps({'jsonrpc':'2.0','method':'notifications/tools','params':{'tools':MCP_TOOLS}})}\n\n"
381
+ while True:
382
+ if await request.is_disconnected(): break
383
+ yield ": ping\n\n"; await asyncio.sleep(15)
384
+ return StreamingResponse(gen(), media_type="text/event-stream",
385
+ headers={"Cache-Control":"no-cache","Connection":"keep-alive","X-Accel-Buffering":"no"})
386
+
387
+ @app.post("/mcp")
388
+ async def mcp_rpc(request:Request):
389
+ try: body = await request.json()
390
+ except Exception: return JSONResponse({"jsonrpc":"2.0","id":None,"error":{"code":-32700,"message":"Parse error"}})
391
+ if isinstance(body,list):
392
+ return JSONResponse([r for r in [handle_mcp(x.get("method",""),x.get("params",{}),x.get("id")) for x in body] if r])
393
+ r = handle_mcp(body.get("method",""), body.get("params",{}), body.get("id"))
394
+ return JSONResponse(r or {"jsonrpc":"2.0","id":body.get("id"),"result":{}})
395
+
396
+ # ---------------------------------------------------------------------------
397
+ # SPA
398
+ # ---------------------------------------------------------------------------
399
+ SPA = r"""<!DOCTYPE html>
400
+ <html lang="en">
401
+ <head>
402
+ <meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
403
+ <title>&#128202; TRACE &#8212; FORGE Telemetry</title>
404
+ <style>
405
+ @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;800&family=DM+Mono:wght@300;400;500&display=swap');
406
+ *{box-sizing:border-box;margin:0;padding:0}
407
+ :root{--bg:#07070e;--sf:#0e0e1a;--sf2:#131320;--br:#1c1c2e;--ac:#ff6b00;--ac2:#ff9500;--tx:#dde0f0;--mu:#55557a;--gr:#00ff88;--rd:#ff4455;--cy:#06b6d4;--pu:#8b5cf6;--ye:#f59e0b}
408
+ html,body{height:100%;background:var(--bg);color:var(--tx);font-family:'Syne',sans-serif;overflow:hidden}
409
+ ::-webkit-scrollbar{width:5px;height:5px}::-webkit-scrollbar-track{background:var(--sf)}::-webkit-scrollbar-thumb{background:var(--br);border-radius:3px}
410
+ .app{display:grid;grid-template-rows:52px 1fr;height:100vh}
411
+ .hdr{display:flex;align-items:center;gap:1rem;padding:0 1.5rem;border-bottom:1px solid var(--br);background:var(--sf)}
412
+ .logo{font-family:'Space Mono',monospace;font-size:1.1rem;font-weight:700;color:var(--ac)}
413
+ .sub{font-family:'DM Mono',monospace;font-size:0.62rem;color:var(--mu);letter-spacing:0.2em;text-transform:uppercase}
414
+ .hs{text-align:center}.hs-n{font-family:'Space Mono',monospace;font-size:1rem;font-weight:700;color:var(--ac)}
415
+ .hs-l{font-family:'DM Mono',monospace;font-size:0.58rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em}
416
+ .hstats{display:flex;gap:1.5rem;margin-left:1rem}
417
+ .win-btn{padding:3px 10px;font-family:'DM Mono',monospace;font-size:.7rem;background:var(--sf2);border:1px solid var(--br);color:var(--mu);border-radius:4px;cursor:pointer;transition:all .15s}
418
+ .win-btn.active,.win-btn:hover{border-color:var(--ac);color:var(--ac)}
419
+ .dot{width:8px;height:8px;border-radius:50%;background:var(--gr);animation:pulse 2s ease-in-out infinite}
420
+ @keyframes pulse{0%,100%{opacity:1;box-shadow:0 0 0 0 rgba(0,255,136,.5)}50%{opacity:.6;box-shadow:0 0 0 6px rgba(0,255,136,0)}}
421
+ .body{display:grid;grid-template-columns:1fr 320px;overflow:hidden}
422
+ .left{display:flex;flex-direction:column;overflow:hidden;border-right:1px solid var(--br)}
423
+ .kpis{display:grid;grid-template-columns:repeat(5,1fr);gap:1px;border-bottom:1px solid var(--br);background:var(--br)}
424
+ .kpi{background:var(--sf);padding:.65rem 1rem;text-align:center}
425
+ .kpi-n{font-family:'Space Mono',monospace;font-size:1.3rem;font-weight:700;color:var(--ac);line-height:1}
426
+ .kpi-l{font-family:'DM Mono',monospace;font-size:.58rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;margin-top:3px}
427
+ .charts{display:grid;grid-template-columns:1fr 1fr;gap:1px;border-bottom:1px solid var(--br);background:var(--br);height:155px}
428
+ .cpanel{background:var(--sf);padding:.6rem .9rem;display:flex;flex-direction:column;overflow:hidden}
429
+ .ct{font-family:'DM Mono',monospace;font-size:.62rem;color:var(--mu);text-transform:uppercase;letter-spacing:.12em;margin-bottom:.4rem}
430
+ .bars{flex:1;display:flex;align-items:flex-end;gap:2px}
431
+ .bl{font-family:'DM Mono',monospace;font-size:.52rem;color:var(--mu);text-align:center;margin-top:2px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
432
+ .dw{flex:1;display:flex;gap:.6rem;align-items:center}
433
+ .leg{flex:1;display:flex;flex-direction:column;gap:3px}
434
+ .li{display:flex;align-items:center;gap:4px;font-family:'DM Mono',monospace;font-size:.62rem;color:var(--mu)}
435
+ .ld{width:7px;height:7px;border-radius:50%;flex-shrink:0}.lv{margin-left:auto;color:var(--tx)}
436
+ .feed-hdr{display:flex;align-items:center;gap:.6rem;padding:.55rem .9rem;border-bottom:1px solid var(--br)}
437
+ .filt-sel{background:var(--sf2);border:1px solid var(--br);color:var(--tx);padding:2px 7px;font-family:'DM Mono',monospace;font-size:.68rem;border-radius:4px;outline:none}
438
+ .filt-sel:focus{border-color:var(--ac)}.filt-sel option{background:var(--sf)}
439
+ .feed{flex:1;overflow-y:auto}
440
+ .erow{display:grid;grid-template-columns:65px 75px 88px 1fr 75px;gap:.4rem;align-items:center;padding:.3rem .9rem;border-bottom:1px solid #11111d;font-family:'DM Mono',monospace;font-size:.7rem;cursor:pointer;transition:background .1s}
441
+ .erow:hover{background:var(--sf)}.erow.sel{background:var(--sf2);border-left:2px solid var(--ac)}
442
+ .et{font-size:.6rem;padding:1px 5px;border-radius:3px;text-align:center}
443
+ .type-llm_call{background:#1a1000;color:#ff9500;border:1px solid #3a2500}
444
+ .type-tool_use{background:#001a0a;color:#00ff88;border:1px solid #004422}
445
+ .type-react_step{background:#0a001a;color:#8b5cf6;border:1px solid #2a0066}
446
+ .type-skill_load{background:#001a1a;color:#06b6d4;border:1px solid #003344}
447
+ .type-kanban_move{background:#1a0a00;color:#f59e0b;border:1px solid #443000}
448
+ .type-slot_event{background:#0a0a1a;color:#6366f1;border:1px solid #22246a}
449
+ .type-self_reflect{background:#1a0010;color:#ec4899;border:1px solid #440033}
450
+ .type-reward_signal{background:#001a08;color:#10b981;border:1px solid #004422}
451
+ .type-error{background:#1a0000;color:#ff4455;border:1px solid #440011}
452
+ .type-custom{background:var(--sf2);color:var(--mu);border:1px solid var(--br)}
453
+ .right{display:flex;flex-direction:column;overflow:hidden}
454
+ .rt{font-family:'DM Mono',monospace;font-size:.62rem;color:var(--mu);text-transform:uppercase;letter-spacing:.15em;padding:.65rem 1rem;border-bottom:1px solid var(--br)}
455
+ .dp{flex:1;overflow-y:auto;padding:.7rem 1rem}
456
+ .df{margin-bottom:.55rem}
457
+ .dfl{font-family:'DM Mono',monospace;font-size:.6rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;margin-bottom:2px}
458
+ .dfv{font-family:'DM Mono',monospace;font-size:.75rem;word-break:break-all}
459
+ pre.dj{background:#0a0a14;border:1px solid var(--br);border-radius:4px;padding:.55rem;font-size:.63rem;color:var(--gr);overflow-x:auto;white-space:pre-wrap;max-height:110px;overflow-y:auto}
460
+ .empty{display:flex;align-items:center;justify-content:center;height:100%;color:var(--mu);font-family:'DM Mono',monospace;font-size:.78rem}
461
+ </style></head><body>
462
+ <div class="app">
463
+ <header class="hdr">
464
+ <div><div class="logo">&#128202; TRACE</div><div class="sub">FORGE Telemetry Backbone</div></div>
465
+ <div class="hstats">
466
+ <div class="hs"><div class="hs-n" id="hT">&#8212;</div><div class="hs-l">Events</div></div>
467
+ <div class="hs"><div class="hs-n" id="hE" style="color:var(--rd)">&#8212;</div><div class="hs-l">Err%</div></div>
468
+ <div class="hs"><div class="hs-n" id="hL">&#8212;</div><div class="hs-l">Avg ms</div></div>
469
+ <div class="hs"><div class="hs-n" id="hTk" style="color:var(--cy)">&#8212;</div><div class="hs-l">Tokens</div></div>
470
+ </div>
471
+ <div style="display:flex;gap:.5rem;margin-left:1.5rem">
472
+ <button class="win-btn active" id="w1" onclick="setW(1)">1h</button>
473
+ <button class="win-btn" id="w6" onclick="setW(6)">6h</button>
474
+ <button class="win-btn" id="w24" onclick="setW(24)">24h</button>
475
+ </div>
476
+ <div style="margin-left:auto"><div class="dot"></div></div>
477
+ </header>
478
+ <div class="body">
479
+ <div class="left">
480
+ <div class="kpis" id="kpis"></div>
481
+ <div class="charts">
482
+ <div class="cpanel"><div class="ct">Hourly trend</div><div class="bars" id="bars"></div></div>
483
+ <div class="cpanel">
484
+ <div class="ct">By event type</div>
485
+ <div class="dw">
486
+ <svg width="80" height="80" viewBox="0 0 36 36" id="donut"></svg>
487
+ <div class="leg" id="leg"></div>
488
+ </div>
489
+ </div>
490
+ </div>
491
+ <div class="feed-hdr">
492
+ <span style="font-family:'DM Mono',monospace;font-size:.65rem;color:var(--mu);text-transform:uppercase;letter-spacing:.15em">Stream</span>
493
+ <div style="display:flex;gap:.4rem;margin-left:auto">
494
+ <select class="filt-sel" id="fA" onchange="loadFeed()">
495
+ <option value="">all agents</option>
496
+ </select>
497
+ <select class="filt-sel" id="fT" onchange="loadFeed()">
498
+ <option value="">all types</option>
499
+ <option>llm_call</option><option>tool_use</option><option>react_step</option>
500
+ <option>skill_load</option><option>kanban_move</option><option>slot_event</option>
501
+ <option>self_reflect</option><option>reward_signal</option><option>error</option><option>custom</option>
502
+ </select>
503
+ <select class="filt-sel" id="fS" onchange="loadFeed()">
504
+ <option value="">all</option><option value="ok">ok</option><option value="error">error</option>
505
+ </select>
506
+ </div>
507
+ </div>
508
+ <div class="feed" id="feed"><div class="empty">Loading...</div></div>
509
+ </div>
510
+ <div class="right">
511
+ <div class="rt" id="rtitle">Select an event</div>
512
+ <div class="dp" id="dp"><div class="empty" style="height:200px">&#8592; Click event to inspect</div></div>
513
+ </div>
514
+ </div>
515
+ </div>
516
+ <script>
517
+ const TC={llm_call:'#ff9500',tool_use:'#00ff88',react_step:'#8b5cf6',skill_load:'#06b6d4',kanban_move:'#f59e0b',slot_event:'#6366f1',self_reflect:'#ec4899',reward_signal:'#10b981',error:'#ff4455',custom:'#55557a'};
518
+ let W=1,events=[],selId=null;
519
+
520
+ function setW(h){W=h;['w1','w6','w24'].forEach(i=>document.getElementById(i).classList.remove('active'));document.getElementById('w'+h).classList.add('active');loadAll()}
521
+ async function loadAll(){await Promise.all([loadStats(),loadFeed()])}
522
+
523
+ async function loadStats(){
524
+ const s=await(await fetch('/api/stats?window_hours='+W)).json();
525
+ document.getElementById('hT').textContent=fmt(s.total_events);
526
+ document.getElementById('hE').textContent=s.error_rate+'%';
527
+ document.getElementById('hL').textContent=s.avg_latency_ms;
528
+ document.getElementById('hTk').textContent=fmtk(s.total_tokens);
529
+ // KPIs
530
+ const ba={};(s.by_agent||[]).forEach(a=>ba[a.agent]=a.cnt);
531
+ const aa=['nexus','pulse','kanban','memory'];
532
+ let other=0;Object.entries(ba).forEach(([a,c])=>{if(!aa.includes(a))other+=c});
533
+ document.getElementById('kpis').innerHTML=aa.map(a=>`<div class="kpi"><div class="kpi-n">${fmt(ba[a]||0)}</div><div class="kpi-l">${a}</div></div>`).join('')+`<div class="kpi"><div class="kpi-n" style="color:var(--mu)">${fmt(other)}</div><div class="kpi-l">other</div></div>`;
534
+ // Agent filter
535
+ const sel=document.getElementById('fA'),cur=sel.value;
536
+ sel.innerHTML='<option value="">all agents</option>';
537
+ (s.by_agent||[]).forEach(a=>{const o=document.createElement('option');o.value=a.agent;o.textContent=a.agent;if(a.agent===cur)o.selected=true;sel.appendChild(o)});
538
+ // Bars
539
+ const trend=s.hourly_trend||[],max=Math.max(...trend.map(r=>r.total||0),1);
540
+ const slots=[];for(let i=W-1;i>=0;i--){const d=new Date(Date.now()-i*3600000),k=d.toISOString().slice(0,13),f=trend.find(r=>r.hour===k);slots.push({h:d.getHours()+'h',n:f?f.total:0,e:f?f.errs:0})}
541
+ document.getElementById('bars').innerHTML=slots.map(s=>{const pct=Math.max(3,Math.round(s.n/max*100)),ep=s.n>0?Math.round(s.e/s.n*pct):0;return`<div style="flex:1;display:flex;flex-direction:column;align-items:center"><div style="width:100%;flex:1;display:flex;flex-direction:column;justify-content:flex-end"><div style="height:${pct}%;background:var(--ac);border-radius:2px 2px 0 0;min-height:2px;position:relative" title="${s.n} events">${ep>0?`<div style="height:${ep}px;max-height:100%;background:var(--rd);position:absolute;bottom:0;left:0;right:0;border-radius:2px 2px 0 0"></div>`:''}</div></div><div class="bl">${s.h}</div></div>`}).join('');
542
+ // Donut
543
+ const bt=s.by_event_type||[],tot=bt.reduce((s,d)=>s+(d.cnt||0),0)||1;
544
+ const top=bt.slice(0,6);let off=0;const cx=18,cy=18,r=15.9,c=2*Math.PI*r;
545
+ let paths='<circle cx="18" cy="18" r="15.9" fill="none" stroke="#1c1c2e" stroke-width="3.5"/>';
546
+ top.forEach(d=>{const p=d.cnt/tot,dash=p*c,col=TC[d.event_type]||'#555';paths+=`<circle cx="${cx}" cy="${cy}" r="${r}" fill="none" stroke="${col}" stroke-width="3.5" stroke-dasharray="${dash} ${c-dash}" stroke-dashoffset="${-off*c}" transform="rotate(-90 18 18)"/>`;off+=p});
547
+ document.getElementById('donut').innerHTML=paths;
548
+ document.getElementById('leg').innerHTML=top.slice(0,5).map(d=>`<div class="li"><span class="ld" style="background:${TC[d.event_type]||'#555'}"></span><span>${d.event_type}</span><span class="lv">${d.cnt}</span></div>`).join('');
549
+ }
550
+
551
+ async function loadFeed(){
552
+ const p=new URLSearchParams({since_hours:W,limit:120});
553
+ ['fA','fT','fS'].forEach((id,i)=>{const v=document.getElementById(id).value,k=['agent','event_type','status'][i];if(v)p.set(k,v)});
554
+ events=(await(await fetch('/api/traces?'+p)).json()).events||[];
555
+ renderFeed();
556
+ }
557
+
558
+ function renderFeed(){
559
+ const f=document.getElementById('feed');
560
+ if(!events.length){f.innerHTML='<div class="empty">No events in window</div>';return}
561
+ f.innerHTML=events.map(e=>{const t=new Date(e.ts*1000).toTimeString().slice(0,8),lat=e.latency_ms?Math.round(e.latency_ms)+'ms':'',prev=e.error_msg||e.tool_name||e.skill_id||e.model||'';return`<div class="erow${e.id===selId?' sel':''}" onclick="selectEv('${e.id}')"><span style="color:var(--mu);font-size:.63rem">${t}</span><span style="color:var(--ac);font-weight:700">${e.agent}</span><span class="et type-${e.event_type}">${e.event_type}</span><span style="color:var(--mu);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(prev)}</span><span style="text-align:right;color:${e.status==='error'?'var(--rd)':'var(--mu)'}">${lat||e.status}</span></div>`}).join('');
562
+ }
563
+
564
+ function selectEv(id){
565
+ selId=id;renderFeed();
566
+ const e=events.find(x=>x.id===id);if(!e)return;
567
+ document.getElementById('rtitle').textContent=e.event_type+' — '+e.agent;
568
+ const fields=[['ID',e.id],['Agent',e.agent],['Type',e.event_type],['Status',`<span style="color:${e.status==='error'?'var(--rd)':'var(--gr)'}">${e.status}</span>`],['Time',new Date(e.ts*1000).toISOString()],['Session',e.session_id||'—'],['Task',e.task_id||'—'],['Latency',e.latency_ms?e.latency_ms+'ms':'—'],['Model',e.model||'—'],['Tool',e.tool_name||'—'],['Skill',e.skill_id||'—'],['Tokens',e.tokens_in||e.tokens_out?`${e.tokens_in||0} in / ${e.tokens_out||0} out`:'—'],['Reward',e.reward!=null?e.reward:'—'],['Error',e.error_msg||'—']];
569
+ const fh=fields.filter(([,v])=>v&&v!=='—').map(([l,v])=>`<div class="df"><div class="dfl">${l}</div><div class="dfv">${v}</div></div>`).join('');
570
+ let ph='';try{const p=typeof e.payload==='string'?JSON.parse(e.payload):e.payload;if(p&&Object.keys(p).length)ph=`<div class="df"><div class="dfl">Payload</div><pre class="dj">${esc(JSON.stringify(p,null,2))}</pre></div>`}catch(x){}
571
+ const rw=e.reward!=null?`<div style="margin-top:.4rem;padding:.4rem;background:#001f08;border:1px solid var(--gr);border-radius:5px;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--gr);text-align:center">&#127942; Reward: <strong>${e.reward}</strong></div>`:'';
572
+ document.getElementById('dp').innerHTML=fh+ph+rw;
573
+ }
574
+
575
+ function fmt(n){return n>=1000?(n/1000).toFixed(1)+'k':String(n||0)}
576
+ function fmtk(n){return n>=1000000?(n/1000000).toFixed(1)+'M':n>=1000?(n/1000).toFixed(0)+'k':String(n||0)}
577
+ function esc(s){return String(s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
578
+
579
+ loadAll();setInterval(loadAll,5000);
580
+ </script></body></html>"""
581
+
582
+ @app.get("/", response_class=HTMLResponse)
583
+ async def root(): return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8")
584
+
585
+ if __name__ == "__main__":
586
+ uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ fastapi>=0.111.0
2
+ uvicorn>=0.30.0