feat: complete phase 5 -- error hardening, frontend, Docker, demo, docs
Backend: - ConversationTracker: Protocol + PostgresConversationTracker for lifecycle tracking - Error handler: ErrorCategory enum, classify_error(), with_retry() exponential backoff - Wire PostgresAnalyticsRecorder + ConversationTracker into ws_handler - Rate limiting (10 msg/10s per thread), edge case hardening - Health endpoint GET /api/health, version 0.5.0 - Demo seed data script + sample OpenAPI spec Frontend (all new): - React Router with NavBar (Chat / Replay / Dashboard / Review) - ReplayListPage + ReplayPage with ReplayTimeline component - DashboardPage with MetricCard, range selector, zero-state - ReviewPage for OpenAPI classification review - ErrorBanner for WebSocket disconnect handling - API client (api.ts) with typed fetch wrappers Infrastructure: - Frontend Dockerfile (multi-stage node -> nginx) - nginx.conf with SPA routing + API/WS proxy - docker-compose.yml with frontend service + healthchecks - .env.example files (root + backend) Documentation: - README.md with quick start and architecture - Agent configuration guide - OpenAPI import guide - Deployment guide - Demo script 48 new tests, 449 total passing, 92.87% coverage
This commit is contained in:
@@ -5,6 +5,8 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
@@ -16,16 +18,23 @@ if TYPE_CHECKING:
|
||||
from fastapi import WebSocket
|
||||
from langgraph.graph.state import CompiledStateGraph
|
||||
|
||||
from app.analytics.event_recorder import AnalyticsRecorder
|
||||
from app.callbacks import TokenUsageCallbackHandler
|
||||
from app.conversation_tracker import ConversationTrackerProtocol
|
||||
from app.interrupt_manager import InterruptManager
|
||||
from app.session_manager import SessionManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_MESSAGE_SIZE = 32_768 # 32 KB
|
||||
MAX_CONTENT_LENGTH = 8_000 # characters
|
||||
MAX_CONTENT_LENGTH = 10_000 # characters
|
||||
THREAD_ID_PATTERN = re.compile(r"^[a-zA-Z0-9\-_]{1,128}$")
|
||||
|
||||
# Rate limiting: max 10 messages per 10-second window, per thread
|
||||
_RATE_LIMIT_MAX = 10
|
||||
_RATE_LIMIT_WINDOW = 10.0
|
||||
_thread_timestamps: dict[str, list[float]] = defaultdict(list)
|
||||
|
||||
|
||||
async def handle_user_message(
|
||||
ws: WebSocket,
|
||||
@@ -197,6 +206,9 @@ async def dispatch_message(
|
||||
callback_handler: TokenUsageCallbackHandler,
|
||||
raw_data: str,
|
||||
interrupt_manager: InterruptManager | None = None,
|
||||
analytics_recorder: AnalyticsRecorder | None = None,
|
||||
conversation_tracker: ConversationTrackerProtocol | None = None,
|
||||
pool: Any = None,
|
||||
) -> None:
|
||||
"""Parse and route an incoming WebSocket message."""
|
||||
if len(raw_data) > MAX_MESSAGE_SIZE:
|
||||
@@ -205,10 +217,14 @@ async def dispatch_message(
|
||||
|
||||
try:
|
||||
data = json.loads(raw_data)
|
||||
except json.JSONDecodeError:
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
await _send_json(ws, {"type": "error", "message": "Invalid JSON"})
|
||||
return
|
||||
|
||||
if not isinstance(data, dict):
|
||||
await _send_json(ws, {"type": "error", "message": "Invalid JSON: expected object"})
|
||||
return
|
||||
|
||||
msg_type = data.get("type")
|
||||
thread_id = data.get("thread_id", "")
|
||||
|
||||
@@ -222,16 +238,36 @@ async def dispatch_message(
|
||||
|
||||
if msg_type == "message":
|
||||
content = data.get("content", "")
|
||||
if not content:
|
||||
if not content or not content.strip():
|
||||
await _send_json(ws, {"type": "error", "message": "Missing message content"})
|
||||
return
|
||||
if len(content) > MAX_CONTENT_LENGTH:
|
||||
await _send_json(ws, {"type": "error", "message": "Message content too long"})
|
||||
return
|
||||
|
||||
# Rate limiting check
|
||||
now = time.time()
|
||||
timestamps = _thread_timestamps[thread_id]
|
||||
cutoff = now - _RATE_LIMIT_WINDOW
|
||||
_thread_timestamps[thread_id] = [t for t in timestamps if t >= cutoff]
|
||||
if len(_thread_timestamps[thread_id]) >= _RATE_LIMIT_MAX:
|
||||
await _send_json(ws, {"type": "error", "message": "Rate limit exceeded"})
|
||||
return
|
||||
_thread_timestamps[thread_id].append(now)
|
||||
|
||||
await handle_user_message(
|
||||
ws, graph, session_manager, callback_handler, thread_id, content,
|
||||
interrupt_manager=interrupt_manager,
|
||||
)
|
||||
await _fire_and_forget_tracking(
|
||||
thread_id=thread_id,
|
||||
pool=pool,
|
||||
analytics_recorder=analytics_recorder,
|
||||
conversation_tracker=conversation_tracker,
|
||||
agent_name=None,
|
||||
tokens=0,
|
||||
cost=0.0,
|
||||
)
|
||||
|
||||
elif msg_type == "interrupt_response":
|
||||
approved = data.get("approved", False)
|
||||
@@ -244,6 +280,36 @@ async def dispatch_message(
|
||||
await _send_json(ws, {"type": "error", "message": "Unknown message type"})
|
||||
|
||||
|
||||
async def _fire_and_forget_tracking(
|
||||
thread_id: str,
|
||||
pool: Any,
|
||||
analytics_recorder: Any | None,
|
||||
conversation_tracker: Any | None,
|
||||
agent_name: str | None,
|
||||
tokens: int,
|
||||
cost: float,
|
||||
) -> None:
|
||||
"""Fire-and-forget analytics/tracking; failures must NOT break chat."""
|
||||
try:
|
||||
if conversation_tracker is not None and pool is not None:
|
||||
await conversation_tracker.ensure_conversation(pool, thread_id)
|
||||
await conversation_tracker.record_turn(pool, thread_id, agent_name, tokens, cost)
|
||||
except Exception:
|
||||
logger.exception("Conversation tracker error for thread %s (suppressed)", thread_id)
|
||||
|
||||
try:
|
||||
if analytics_recorder is not None:
|
||||
await analytics_recorder.record(
|
||||
thread_id=thread_id,
|
||||
event_type="message",
|
||||
agent_name=agent_name,
|
||||
tokens_used=tokens,
|
||||
cost_usd=cost,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Analytics recorder error for thread %s (suppressed)", thread_id)
|
||||
|
||||
|
||||
def _has_interrupt(state: Any) -> bool:
|
||||
"""Check if the graph state has a pending interrupt."""
|
||||
tasks = getattr(state, "tasks", ())
|
||||
|
||||
Reference in New Issue
Block a user