refactor: fix architectural issues across frontend and backend

Address all architecture review findings:

P0 fixes:
- Add API key authentication for admin endpoints (analytics, replay, openapi)
  and WebSocket connections via ADMIN_API_KEY env var
- Add PostgreSQL-backed PgSessionManager and PgInterruptManager for
  multi-worker production deployments (in-memory defaults preserved)

P1 fixes:
- Implement actual tool generation in OpenAPI approve_job endpoint
  using generate_tool_code() and generate_agent_yaml()
- Add missing clarification, interrupt_expired, and tool_result message
  handlers in frontend ChatPage

P2 fixes:
- Replace monkey-patching on CompiledStateGraph with typed GraphContext
- Replace 9-param dispatch_message with WebSocketContext dataclass
- Extract duplicate _envelope() into shared app/api_utils.py
- Replace mutable module-level counter with crypto.randomUUID()
- Remove hardcoded mock data from ReviewPage, use api.ts wrappers
- Remove `as any` type escape from ReplayPage

All 516 tests passing, 0 TypeScript errors.
This commit is contained in:
Yaojia Wang
2026-04-06 15:59:14 +02:00
parent b8654aa31f
commit af53111928
29 changed files with 1183 additions and 473 deletions

View File

@@ -7,7 +7,7 @@ from contextlib import asynccontextmanager
from pathlib import Path
from typing import TYPE_CHECKING
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi import Depends, FastAPI, Query, WebSocket, WebSocketDisconnect
from fastapi.staticfiles import StaticFiles
from app.analytics.api import router as analytics_router
@@ -25,6 +25,7 @@ from app.openapi.review_api import router as openapi_router
from app.registry import AgentRegistry
from app.replay.api import router as replay_router
from app.session_manager import SessionManager
from app.ws_context import WebSocketContext
from app.ws_handler import dispatch_message
if TYPE_CHECKING:
@@ -52,7 +53,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
llm = create_llm(settings)
intent_classifier = LLMIntentClassifier(llm)
graph = build_graph(registry, llm, checkpointer, intent_classifier=intent_classifier)
graph_ctx = build_graph(registry, llm, checkpointer, intent_classifier=intent_classifier)
session_manager = SessionManager(
session_ttl_seconds=settings.session_ttl_minutes * 60,
@@ -71,7 +72,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
else:
escalator = NoOpEscalator()
app.state.graph = graph
app.state.graph_ctx = graph_ctx
app.state.session_manager = session_manager
app.state.interrupt_manager = interrupt_manager
app.state.escalator = escalator
@@ -93,7 +94,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
await pool.close()
_VERSION = "0.5.0"
_VERSION = "0.6.0"
app = FastAPI(title="Smart Support", version=_VERSION, lifespan=lifespan)
@@ -109,28 +110,37 @@ def health_check() -> dict:
@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket) -> None:
await ws.accept()
graph = app.state.graph
session_manager = app.state.session_manager
interrupt_manager = app.state.interrupt_manager
async def websocket_endpoint(
ws: WebSocket,
token: str | None = Query(default=None),
) -> None:
settings = app.state.settings
# Verify WebSocket token when admin_api_key is configured
if settings.admin_api_key:
import secrets as _secrets
if token is None or not _secrets.compare_digest(token, settings.admin_api_key):
await ws.close(code=4001, reason="Unauthorized")
return
await ws.accept()
callback_handler = TokenUsageCallbackHandler(model_name=settings.llm_model)
analytics_recorder = app.state.analytics_recorder
conversation_tracker = app.state.conversation_tracker
pool = app.state.pool
ws_ctx = WebSocketContext(
graph_ctx=app.state.graph_ctx,
session_manager=app.state.session_manager,
callback_handler=callback_handler,
interrupt_manager=app.state.interrupt_manager,
analytics_recorder=app.state.analytics_recorder,
conversation_tracker=app.state.conversation_tracker,
pool=app.state.pool,
)
try:
while True:
raw_data = await ws.receive_text()
await dispatch_message(
ws, graph, session_manager, callback_handler, raw_data,
interrupt_manager=interrupt_manager,
analytics_recorder=analytics_recorder,
conversation_tracker=conversation_tracker,
pool=pool,
)
await dispatch_message(ws, ws_ctx, raw_data)
except WebSocketDisconnect:
logger.info("WebSocket client disconnected")