feat: complete phase 5 -- error hardening, frontend, Docker, demo, docs

Backend:
- ConversationTracker: Protocol + PostgresConversationTracker for lifecycle tracking
- Error handler: ErrorCategory enum, classify_error(), with_retry() exponential backoff
- Wire PostgresAnalyticsRecorder + ConversationTracker into ws_handler
- Rate limiting (10 msg/10s per thread), edge case hardening
- Health endpoint GET /api/health, version 0.5.0
- Demo seed data script + sample OpenAPI spec

Frontend (all new):
- React Router with NavBar (Chat / Replay / Dashboard / Review)
- ReplayListPage + ReplayPage with ReplayTimeline component
- DashboardPage with MetricCard, range selector, zero-state
- ReviewPage for OpenAPI classification review
- ErrorBanner for WebSocket disconnect handling
- API client (api.ts) with typed fetch wrappers

Infrastructure:
- Frontend Dockerfile (multi-stage node -> nginx)
- nginx.conf with SPA routing + API/WS proxy
- docker-compose.yml with frontend service + healthchecks
- .env.example files (root + backend)

Documentation:
- README.md with quick start and architecture
- Agent configuration guide
- OpenAPI import guide
- Deployment guide
- Demo script

48 new tests, 449 total passing, 92.87% coverage
This commit is contained in:
Yaojia Wang
2026-03-31 21:20:06 +02:00
parent 38644594d2
commit 0e78e5b06b
44 changed files with 3397 additions and 169 deletions

View File

@@ -0,0 +1,156 @@
"""Tests for app.conversation_tracker module."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
import pytest
from app.conversation_tracker import (
ConversationTrackerProtocol,
NoOpConversationTracker,
PostgresConversationTracker,
)
pytestmark = pytest.mark.unit
def _make_pool() -> AsyncMock:
"""Create a mock async connection pool."""
pool = AsyncMock()
conn = AsyncMock()
conn.execute = AsyncMock()
pool.connection = MagicMock(return_value=_AsyncContextManager(conn))
return pool, conn
class _AsyncContextManager:
"""Async context manager helper."""
def __init__(self, value: object) -> None:
self._value = value
async def __aenter__(self) -> object:
return self._value
async def __aexit__(self, *args: object) -> None:
pass
class TestConversationTrackerProtocol:
def test_noop_satisfies_protocol(self) -> None:
tracker = NoOpConversationTracker()
assert isinstance(tracker, ConversationTrackerProtocol)
def test_postgres_satisfies_protocol(self) -> None:
tracker = PostgresConversationTracker()
assert isinstance(tracker, ConversationTrackerProtocol)
class TestNoOpConversationTracker:
@pytest.mark.asyncio
async def test_ensure_conversation_does_nothing(self) -> None:
tracker = NoOpConversationTracker()
pool = AsyncMock()
# Should not raise
await tracker.ensure_conversation(pool, "thread-1")
@pytest.mark.asyncio
async def test_record_turn_does_nothing(self) -> None:
tracker = NoOpConversationTracker()
pool = AsyncMock()
await tracker.record_turn(pool, "thread-1", "agent_a", 100, 0.05)
@pytest.mark.asyncio
async def test_resolve_does_nothing(self) -> None:
tracker = NoOpConversationTracker()
pool = AsyncMock()
await tracker.resolve(pool, "thread-1", "resolved")
@pytest.mark.asyncio
async def test_accepts_none_agent_name(self) -> None:
tracker = NoOpConversationTracker()
pool = AsyncMock()
await tracker.record_turn(pool, "thread-1", None, 0, 0.0)
class TestPostgresConversationTracker:
@pytest.mark.asyncio
async def test_ensure_conversation_executes_insert(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.ensure_conversation(pool, "thread-abc")
conn.execute.assert_awaited_once()
sql, params = conn.execute.call_args[0]
assert "INSERT" in sql
assert "ON CONFLICT" in sql
assert params["thread_id"] == "thread-abc"
@pytest.mark.asyncio
async def test_record_turn_executes_update(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.record_turn(pool, "thread-abc", "order_agent", 250, 0.12)
conn.execute.assert_awaited_once()
sql, params = conn.execute.call_args[0]
assert "UPDATE" in sql
assert params["thread_id"] == "thread-abc"
assert params["agent_name"] == "order_agent"
assert params["tokens"] == 250
assert params["cost"] == 0.12
@pytest.mark.asyncio
async def test_record_turn_accepts_none_agent_name(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.record_turn(pool, "thread-abc", None, 0, 0.0)
conn.execute.assert_awaited_once()
sql, params = conn.execute.call_args[0]
assert params["agent_name"] is None
@pytest.mark.asyncio
async def test_resolve_executes_update(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.resolve(pool, "thread-abc", "resolved")
conn.execute.assert_awaited_once()
sql, params = conn.execute.call_args[0]
assert "UPDATE" in sql
assert params["thread_id"] == "thread-abc"
assert params["resolution_type"] == "resolved"
@pytest.mark.asyncio
async def test_resolve_sets_ended_at(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.resolve(pool, "thread-abc", "escalated")
sql, params = conn.execute.call_args[0]
assert "ended_at" in sql.lower()
@pytest.mark.asyncio
async def test_ensure_conversation_with_special_thread_id(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.ensure_conversation(pool, "thread-123-abc-XYZ")
conn.execute.assert_awaited_once()
@pytest.mark.asyncio
async def test_record_turn_with_zero_cost(self) -> None:
tracker = PostgresConversationTracker()
pool, conn = _make_pool()
await tracker.record_turn(pool, "t1", "agent", 0, 0.0)
conn.execute.assert_awaited_once()

View File

@@ -0,0 +1,213 @@
"""Edge case tests for ws_handler input validation and rate limiting."""
from __future__ import annotations
import json
from unittest.mock import AsyncMock, MagicMock
import pytest
from app.callbacks import TokenUsageCallbackHandler
from app.session_manager import SessionManager
from app.ws_handler import dispatch_message
pytestmark = pytest.mark.unit
def _make_ws() -> AsyncMock:
ws = AsyncMock()
ws.send_json = AsyncMock()
return ws
def _make_graph() -> AsyncMock:
graph = AsyncMock()
class AsyncIterHelper:
def __aiter__(self):
return self
async def __anext__(self):
raise StopAsyncIteration
graph.astream = MagicMock(return_value=AsyncIterHelper())
state = MagicMock()
state.tasks = ()
graph.aget_state = AsyncMock(return_value=state)
graph.intent_classifier = None
graph.agent_registry = None
return graph
@pytest.mark.unit
class TestEmptyMessageHandling:
@pytest.mark.asyncio
async def test_empty_message_content_returns_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": ""})
await dispatch_message(ws, graph, sm, cb, msg)
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
msg_lower = call_data["message"].lower()
assert "content" in msg_lower or "missing" in msg_lower
@pytest.mark.asyncio
async def test_whitespace_only_message_treated_as_empty(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": " "})
await dispatch_message(ws, graph, sm, cb, msg)
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
@pytest.mark.unit
class TestOversizedMessageHandling:
@pytest.mark.asyncio
async def test_content_over_10000_chars_returns_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
content = "x" * 10001
msg = json.dumps({"type": "message", "thread_id": "t1", "content": content})
await dispatch_message(ws, graph, sm, cb, msg)
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
assert "too long" in call_data["message"].lower()
@pytest.mark.asyncio
async def test_content_exactly_10000_chars_is_accepted(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
content = "x" * 10000
msg = json.dumps({"type": "message", "thread_id": "t1", "content": content})
await dispatch_message(ws, graph, sm, cb, msg)
last_call = ws.send_json.call_args[0][0]
# Should be processed, not an error about length
msg_text = last_call.get("message", "").lower()
assert last_call["type"] != "error" or "too long" not in msg_text
@pytest.mark.asyncio
async def test_raw_message_over_32kb_returns_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
large_msg = "x" * 40_000
await dispatch_message(ws, graph, sm, cb, large_msg)
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
assert "too large" in call_data["message"].lower()
@pytest.mark.unit
class TestInvalidJsonHandling:
@pytest.mark.asyncio
async def test_invalid_json_returns_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
await dispatch_message(ws, graph, sm, cb, "not valid json {{")
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
assert "invalid json" in call_data["message"].lower()
@pytest.mark.asyncio
async def test_empty_string_returns_json_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
await dispatch_message(ws, graph, sm, cb, "")
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
@pytest.mark.asyncio
async def test_json_array_not_object_returns_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
await dispatch_message(ws, graph, sm, cb, '["not", "an", "object"]')
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
@pytest.mark.unit
class TestRateLimiting:
@pytest.mark.asyncio
async def test_rapid_fire_messages_rate_limited(self) -> None:
ws = _make_ws()
_make_graph() # ensure graph factory works, not needed directly
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
# Simulate 11 rapid messages (exceeds 10 per 10 seconds limit)
rate_limit_triggered = False
for i in range(11):
graph2 = _make_graph() # fresh graph each time
await dispatch_message(ws, graph2, sm, cb, json.dumps({
"type": "message",
"thread_id": "t1",
"content": f"message {i}",
}))
last_call = ws.send_json.call_args[0][0]
if last_call["type"] == "error" and "rate" in last_call.get("message", "").lower():
rate_limit_triggered = True
break
assert rate_limit_triggered, "Rate limiting should trigger after 10 rapid messages"
@pytest.mark.asyncio
async def test_different_threads_have_separate_rate_limits(self) -> None:
ws = _make_ws()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
sm.touch("t2")
# Send 5 messages on t1 and 5 on t2 -- neither should be rate limited
for i in range(5):
graph1 = _make_graph()
graph2 = _make_graph()
await dispatch_message(ws, graph1, sm, cb, json.dumps({
"type": "message", "thread_id": "t1", "content": f"msg {i}",
}))
await dispatch_message(ws, graph2, sm, cb, json.dumps({
"type": "message", "thread_id": "t2", "content": f"msg {i}",
}))
last_call = ws.send_json.call_args[0][0]
assert "rate" not in last_call.get("message", "").lower()

View File

@@ -0,0 +1,175 @@
"""Tests for app.tools.error_handler module."""
from __future__ import annotations
from unittest.mock import AsyncMock, patch
import httpx
import pytest
from app.tools.error_handler import (
ErrorCategory,
classify_error,
with_retry,
)
pytestmark = pytest.mark.unit
class TestErrorClassification:
def test_timeout_exception_is_timeout(self) -> None:
exc = httpx.TimeoutException("timed out")
assert classify_error(exc) == ErrorCategory.TIMEOUT
def test_connect_error_is_network(self) -> None:
exc = httpx.ConnectError("connection refused")
assert classify_error(exc) == ErrorCategory.NETWORK
def test_401_is_auth_failure(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(401, request=request)
exc = httpx.HTTPStatusError("401", request=request, response=response)
assert classify_error(exc) == ErrorCategory.AUTH_FAILURE
def test_403_is_auth_failure(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(403, request=request)
exc = httpx.HTTPStatusError("403", request=request, response=response)
assert classify_error(exc) == ErrorCategory.AUTH_FAILURE
def test_429_is_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(429, request=request)
exc = httpx.HTTPStatusError("429", request=request, response=response)
assert classify_error(exc) == ErrorCategory.RETRYABLE
def test_500_is_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(500, request=request)
exc = httpx.HTTPStatusError("500", request=request, response=response)
assert classify_error(exc) == ErrorCategory.RETRYABLE
def test_502_is_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(502, request=request)
exc = httpx.HTTPStatusError("502", request=request, response=response)
assert classify_error(exc) == ErrorCategory.RETRYABLE
def test_503_is_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(503, request=request)
exc = httpx.HTTPStatusError("503", request=request, response=response)
assert classify_error(exc) == ErrorCategory.RETRYABLE
def test_404_is_non_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(404, request=request)
exc = httpx.HTTPStatusError("404", request=request, response=response)
assert classify_error(exc) == ErrorCategory.NON_RETRYABLE
def test_400_is_non_retryable(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(400, request=request)
exc = httpx.HTTPStatusError("400", request=request, response=response)
assert classify_error(exc) == ErrorCategory.NON_RETRYABLE
def test_generic_exception_is_non_retryable(self) -> None:
exc = ValueError("bad value")
assert classify_error(exc) == ErrorCategory.NON_RETRYABLE
def test_runtime_error_is_non_retryable(self) -> None:
exc = RuntimeError("boom")
assert classify_error(exc) == ErrorCategory.NON_RETRYABLE
class TestWithRetry:
@pytest.mark.asyncio
async def test_succeeds_on_first_try(self) -> None:
fn = AsyncMock(return_value="ok")
result = await with_retry(fn, max_retries=3, base_delay=0.0)
assert result == "ok"
assert fn.call_count == 1
@pytest.mark.asyncio
async def test_retries_on_retryable_error(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(503, request=request)
retryable_exc = httpx.HTTPStatusError("503", request=request, response=response)
fn = AsyncMock(side_effect=[retryable_exc, retryable_exc, "success"])
with patch("app.tools.error_handler.asyncio.sleep", new_callable=AsyncMock):
result = await with_retry(fn, max_retries=3, base_delay=0.0)
assert result == "success"
assert fn.call_count == 3
@pytest.mark.asyncio
async def test_does_not_retry_non_retryable_error(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(404, request=request)
non_retryable_exc = httpx.HTTPStatusError("404", request=request, response=response)
fn = AsyncMock(side_effect=non_retryable_exc)
with pytest.raises(httpx.HTTPStatusError):
await with_retry(fn, max_retries=3, base_delay=0.0)
assert fn.call_count == 1
@pytest.mark.asyncio
async def test_does_not_retry_auth_failure(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(401, request=request)
auth_exc = httpx.HTTPStatusError("401", request=request, response=response)
fn = AsyncMock(side_effect=auth_exc)
with pytest.raises(httpx.HTTPStatusError):
await with_retry(fn, max_retries=3, base_delay=0.0)
assert fn.call_count == 1
@pytest.mark.asyncio
async def test_raises_after_max_retries_exhausted(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(500, request=request)
retryable_exc = httpx.HTTPStatusError("500", request=request, response=response)
fn = AsyncMock(side_effect=retryable_exc)
with (
patch("app.tools.error_handler.asyncio.sleep", new_callable=AsyncMock),
pytest.raises(httpx.HTTPStatusError),
):
await with_retry(fn, max_retries=3, base_delay=0.0)
assert fn.call_count == 3
@pytest.mark.asyncio
async def test_does_not_retry_timeout(self) -> None:
"""TimeoutException is TIMEOUT category -- not retried by default."""
fn = AsyncMock(side_effect=httpx.TimeoutException("timed out"))
with pytest.raises(httpx.TimeoutException):
await with_retry(fn, max_retries=3, base_delay=0.0)
assert fn.call_count == 1
@pytest.mark.asyncio
async def test_exponential_backoff_increases_delay(self) -> None:
request = httpx.Request("GET", "http://example.com")
response = httpx.Response(503, request=request)
retryable_exc = httpx.HTTPStatusError("503", request=request, response=response)
fn = AsyncMock(side_effect=[retryable_exc, retryable_exc, "done"])
sleep_delays: list[float] = []
async def capture_sleep(delay: float) -> None:
sleep_delays.append(delay)
with patch("app.tools.error_handler.asyncio.sleep", side_effect=capture_sleep):
await with_retry(fn, max_retries=3, base_delay=1.0)
assert len(sleep_delays) == 2
assert sleep_delays[1] > sleep_delays[0]

View File

@@ -13,7 +13,7 @@ class TestMainModule:
assert app.title == "Smart Support"
def test_app_version(self) -> None:
assert app.version == "0.4.0"
assert app.version == "0.5.0"
def test_agents_yaml_path_exists(self) -> None:
assert AGENTS_YAML.name == "agents.yaml"
@@ -33,3 +33,10 @@ class TestMainModule:
def test_analytics_router_registered(self) -> None:
routes = [r.path for r in app.routes if hasattr(r, "path")]
assert any("analytics" in p for p in routes)
def test_health_route_registered(self) -> None:
routes = [r.path for r in app.routes if hasattr(r, "path")]
assert "/api/health" in routes
def test_app_version_is_0_5_0(self) -> None:
assert app.version == "0.5.0"

View File

@@ -138,7 +138,7 @@ class TestDispatchMessage:
sm = SessionManager()
cb = TokenUsageCallbackHandler()
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "x" * 9000})
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "x" * 10001})
await dispatch_message(ws, graph, sm, cb, msg)
call_data = ws.send_json.call_args[0][0]
assert call_data["type"] == "error"
@@ -364,3 +364,80 @@ class TestInterruptHelpers:
state.tasks = ()
data = _extract_interrupt(state)
assert data["action"] == "unknown"
@pytest.mark.unit
class TestDispatchMessageWithTracking:
@pytest.mark.asyncio
async def test_conversation_tracker_called_on_message(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
tracker = AsyncMock()
pool = MagicMock()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "hello"})
await dispatch_message(
ws, graph, sm, cb, msg,
conversation_tracker=tracker,
pool=pool,
)
tracker.ensure_conversation.assert_awaited_once_with(pool, "t1")
tracker.record_turn.assert_awaited_once()
@pytest.mark.asyncio
async def test_analytics_recorder_called_on_message(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
recorder = AsyncMock()
pool = MagicMock()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "hello"})
await dispatch_message(
ws, graph, sm, cb, msg,
analytics_recorder=recorder,
pool=pool,
)
recorder.record.assert_awaited_once()
@pytest.mark.asyncio
async def test_tracker_failure_does_not_break_chat(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
tracker = AsyncMock()
tracker.ensure_conversation.side_effect = RuntimeError("DB down")
pool = MagicMock()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "hello"})
# Should not raise despite tracker failure
await dispatch_message(
ws, graph, sm, cb, msg,
conversation_tracker=tracker,
pool=pool,
)
last_call = ws.send_json.call_args[0][0]
assert last_call["type"] == "message_complete"
@pytest.mark.asyncio
async def test_no_tracker_no_error(self) -> None:
ws = _make_ws()
graph = _make_graph()
sm = SessionManager()
cb = TokenUsageCallbackHandler()
sm.touch("t1")
msg = json.dumps({"type": "message", "thread_id": "t1", "content": "hello"})
# No tracker or recorder passed -- should work fine
await dispatch_message(ws, graph, sm, cb, msg)
last_call = ws.send_json.call_args[0][0]
assert last_call["type"] == "message_complete"