feat: complete phase 4 -- conversation replay API + analytics dashboard
- Replay models: StepType enum, ReplayStep, ReplayPage frozen dataclasses
- Checkpoint transformer: PostgresSaver JSONB -> structured timeline steps
- Replay API: GET /api/conversations (paginated), GET /api/replay/{thread_id}
- Analytics models: AgentUsage, InterruptStats, AnalyticsResult
- Analytics event recorder: Protocol + PostgresAnalyticsRecorder + NoOp
- Analytics queries: resolution_rate, agent_usage, escalation_rate, cost, interrupts
- Analytics API: GET /api/analytics?range=Xd with envelope response
- DB migration: analytics_events table + conversations column additions
- 74 new tests, 399 total passing, 92.87% coverage
This commit is contained in:
1
backend/tests/unit/replay/__init__.py
Normal file
1
backend/tests/unit/replay/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Unit tests for app.replay module."""
|
||||
160
backend/tests/unit/replay/test_api.py
Normal file
160
backend/tests/unit/replay/test_api.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""Unit tests for app.replay.api."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _build_app() -> FastAPI:
|
||||
from app.replay.api import router
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(router)
|
||||
return app
|
||||
|
||||
|
||||
def _make_mock_pool(fetchall_result: list[dict]) -> MagicMock:
|
||||
"""Build a mock pool that returns the given rows from fetchall."""
|
||||
mock_cursor = AsyncMock()
|
||||
mock_cursor.fetchall = AsyncMock(return_value=fetchall_result)
|
||||
|
||||
mock_conn = AsyncMock()
|
||||
mock_conn.execute = AsyncMock(return_value=mock_cursor)
|
||||
|
||||
mock_ctx = AsyncMock()
|
||||
mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
|
||||
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
mock_pool = MagicMock()
|
||||
mock_pool.connection.return_value = mock_ctx
|
||||
return mock_pool
|
||||
|
||||
|
||||
class TestListConversations:
|
||||
def test_returns_200_with_empty_list(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/conversations")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["success"] is True
|
||||
assert isinstance(body["data"], list)
|
||||
assert body["error"] is None
|
||||
|
||||
def test_returns_conversations_list(self) -> None:
|
||||
app = _build_app()
|
||||
mock_rows = [
|
||||
{
|
||||
"thread_id": "t1",
|
||||
"created_at": "2026-01-01T00:00:00",
|
||||
"last_activity": "2026-01-01T00:01:00",
|
||||
"status": "active",
|
||||
"total_tokens": 100,
|
||||
"total_cost_usd": 0.01,
|
||||
}
|
||||
]
|
||||
app.state.pool = _make_mock_pool(mock_rows)
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/conversations")
|
||||
body = resp.json()
|
||||
assert resp.status_code == 200
|
||||
assert len(body["data"]) == 1
|
||||
assert body["data"][0]["thread_id"] == "t1"
|
||||
|
||||
def test_pagination_defaults(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/conversations")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_pagination_custom_params(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/conversations?page=2&per_page=10")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_per_page_max_capped_at_100(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/conversations?per_page=200")
|
||||
# FastAPI validation rejects values > 100
|
||||
assert resp.status_code in (200, 422)
|
||||
|
||||
|
||||
class TestGetReplay:
|
||||
def test_thread_not_found_returns_404(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/replay/nonexistent-thread")
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_returns_replay_page_for_existing_thread(self) -> None:
|
||||
app = _build_app()
|
||||
mock_rows = [
|
||||
{
|
||||
"thread_id": "thread-123",
|
||||
"checkpoint_id": "cp-001",
|
||||
"checkpoint": {
|
||||
"channel_values": {
|
||||
"messages": [{"type": "human", "content": "Hello"}]
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
}
|
||||
]
|
||||
app.state.pool = _make_mock_pool(mock_rows)
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/replay/thread-123")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["success"] is True
|
||||
assert body["data"]["thread_id"] == "thread-123"
|
||||
assert "steps" in body["data"]
|
||||
assert "total_steps" in body["data"]
|
||||
assert "page" in body["data"]
|
||||
assert "per_page" in body["data"]
|
||||
|
||||
def test_replay_pagination_params(self) -> None:
|
||||
app = _build_app()
|
||||
mock_rows = [
|
||||
{
|
||||
"thread_id": "t1",
|
||||
"checkpoint_id": "cp-001",
|
||||
"checkpoint": {
|
||||
"channel_values": {"messages": [{"type": "human", "content": "Hi"}]}
|
||||
},
|
||||
"metadata": {},
|
||||
}
|
||||
]
|
||||
app.state.pool = _make_mock_pool(mock_rows)
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/replay/t1?page=1&per_page=5")
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_error_response_has_envelope(self) -> None:
|
||||
app = _build_app()
|
||||
app.state.pool = _make_mock_pool([])
|
||||
|
||||
with TestClient(app) as client:
|
||||
resp = client.get("/api/replay/missing")
|
||||
body = resp.json()
|
||||
assert "detail" in body or "error" in body or resp.status_code == 404
|
||||
134
backend/tests/unit/replay/test_models.py
Normal file
134
backend/tests/unit/replay/test_models.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Unit tests for app.replay.models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class TestStepType:
|
||||
def test_all_step_types_exist(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
|
||||
assert StepType.user_message
|
||||
assert StepType.supervisor_routing
|
||||
assert StepType.tool_call
|
||||
assert StepType.tool_result
|
||||
assert StepType.agent_response
|
||||
assert StepType.interrupt
|
||||
|
||||
def test_step_type_values(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
|
||||
assert StepType.user_message.value == "user_message"
|
||||
assert StepType.tool_call.value == "tool_call"
|
||||
assert StepType.agent_response.value == "agent_response"
|
||||
|
||||
|
||||
class TestReplayStep:
|
||||
def test_minimal_replay_step(self) -> None:
|
||||
from app.replay.models import ReplayStep, StepType
|
||||
|
||||
step = ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z")
|
||||
assert step.step == 1
|
||||
assert step.type == StepType.user_message
|
||||
assert step.timestamp == "2026-01-01T00:00:00Z"
|
||||
assert step.content == ""
|
||||
assert step.agent is None
|
||||
assert step.tool is None
|
||||
assert step.params is None
|
||||
assert step.result is None
|
||||
assert step.reasoning is None
|
||||
assert step.tokens is None
|
||||
assert step.duration_ms is None
|
||||
|
||||
def test_full_replay_step(self) -> None:
|
||||
from app.replay.models import ReplayStep, StepType
|
||||
|
||||
step = ReplayStep(
|
||||
step=2,
|
||||
type=StepType.tool_call,
|
||||
timestamp="2026-01-01T00:00:01Z",
|
||||
content="calling get_order",
|
||||
agent="order_agent",
|
||||
tool="get_order_status",
|
||||
params={"order_id": "ORD-123"},
|
||||
result={"status": "shipped"},
|
||||
reasoning="user asked about order",
|
||||
tokens=50,
|
||||
duration_ms=200,
|
||||
)
|
||||
assert step.step == 2
|
||||
assert step.agent == "order_agent"
|
||||
assert step.tool == "get_order_status"
|
||||
assert step.params == {"order_id": "ORD-123"}
|
||||
assert step.tokens == 50
|
||||
|
||||
def test_replay_step_is_frozen(self) -> None:
|
||||
from app.replay.models import ReplayStep, StepType
|
||||
|
||||
step = ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z")
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
step.step = 99 # type: ignore[misc]
|
||||
|
||||
def test_replay_step_params_is_immutable_copy(self) -> None:
|
||||
from app.replay.models import ReplayStep, StepType
|
||||
|
||||
params = {"key": "value"}
|
||||
step = ReplayStep(
|
||||
step=1,
|
||||
type=StepType.tool_call,
|
||||
timestamp="2026-01-01T00:00:00Z",
|
||||
params=params,
|
||||
)
|
||||
# Modifying original dict should not affect step
|
||||
params["new_key"] = "new_value"
|
||||
assert "new_key" not in (step.params or {})
|
||||
|
||||
|
||||
class TestReplayPage:
|
||||
def test_replay_page_construction(self) -> None:
|
||||
from app.replay.models import ReplayPage, ReplayStep, StepType
|
||||
|
||||
steps = (
|
||||
ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z"),
|
||||
ReplayStep(step=2, type=StepType.agent_response, timestamp="2026-01-01T00:00:01Z"),
|
||||
)
|
||||
page = ReplayPage(
|
||||
thread_id="thread-123",
|
||||
total_steps=2,
|
||||
page=1,
|
||||
per_page=20,
|
||||
steps=steps,
|
||||
)
|
||||
assert page.thread_id == "thread-123"
|
||||
assert page.total_steps == 2
|
||||
assert page.page == 1
|
||||
assert page.per_page == 20
|
||||
assert len(page.steps) == 2
|
||||
|
||||
def test_replay_page_is_frozen(self) -> None:
|
||||
from app.replay.models import ReplayPage
|
||||
|
||||
page = ReplayPage(
|
||||
thread_id="t1",
|
||||
total_steps=0,
|
||||
page=1,
|
||||
per_page=20,
|
||||
steps=(),
|
||||
)
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
page.page = 2 # type: ignore[misc]
|
||||
|
||||
def test_replay_page_empty_steps(self) -> None:
|
||||
from app.replay.models import ReplayPage
|
||||
|
||||
page = ReplayPage(
|
||||
thread_id="t1",
|
||||
total_steps=0,
|
||||
page=1,
|
||||
per_page=20,
|
||||
steps=(),
|
||||
)
|
||||
assert page.steps == ()
|
||||
155
backend/tests/unit/replay/test_transformer.py
Normal file
155
backend/tests/unit/replay/test_transformer.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""Unit tests for app.replay.transformer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _make_row(messages: list[dict], metadata: dict | None = None) -> dict:
|
||||
"""Helper to build a checkpoint row with the given messages."""
|
||||
return {
|
||||
"thread_id": "thread-abc",
|
||||
"checkpoint_id": "cp-001",
|
||||
"checkpoint": {"channel_values": {"messages": messages}},
|
||||
"metadata": metadata or {},
|
||||
}
|
||||
|
||||
|
||||
class TestTransformCheckpoints:
|
||||
def test_empty_rows_returns_empty_list(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
result = transform_checkpoints([])
|
||||
assert result == []
|
||||
|
||||
def test_human_message_produces_user_message_step(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [_make_row([{"type": "human", "content": "Hello, I need help"}])]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 1
|
||||
assert steps[0].type == StepType.user_message
|
||||
assert steps[0].content == "Hello, I need help"
|
||||
assert steps[0].step == 1
|
||||
|
||||
def test_ai_message_with_content_produces_agent_response(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [
|
||||
_make_row(
|
||||
[{"type": "ai", "content": "I can help you with that.", "tool_calls": []}],
|
||||
metadata={"writes": {"some_agent": "response"}},
|
||||
)
|
||||
]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 1
|
||||
assert steps[0].type == StepType.agent_response
|
||||
assert steps[0].content == "I can help you with that."
|
||||
|
||||
def test_ai_message_with_tool_calls_produces_tool_call_step(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [
|
||||
_make_row(
|
||||
[
|
||||
{
|
||||
"type": "ai",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"name": "get_order_status",
|
||||
"args": {"order_id": "ORD-123"},
|
||||
"id": "call_abc",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 1
|
||||
assert steps[0].type == StepType.tool_call
|
||||
assert steps[0].tool == "get_order_status"
|
||||
assert steps[0].params == {"order_id": "ORD-123"}
|
||||
|
||||
def test_tool_message_produces_tool_result_step(self) -> None:
|
||||
from app.replay.models import StepType
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [
|
||||
_make_row(
|
||||
[
|
||||
{
|
||||
"type": "tool",
|
||||
"content": '{"status": "shipped"}',
|
||||
"name": "get_order_status",
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 1
|
||||
assert steps[0].type == StepType.tool_result
|
||||
assert steps[0].tool == "get_order_status"
|
||||
|
||||
def test_multiple_messages_sequential_steps(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [
|
||||
_make_row(
|
||||
[
|
||||
{"type": "human", "content": "Help"},
|
||||
{"type": "ai", "content": "Sure!", "tool_calls": []},
|
||||
]
|
||||
)
|
||||
]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 2
|
||||
assert steps[0].step == 1
|
||||
assert steps[1].step == 2
|
||||
|
||||
def test_unknown_message_type_skipped(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [_make_row([{"type": "unknown_type", "content": "test"}])]
|
||||
steps = transform_checkpoints(rows)
|
||||
# Should not crash; unknown types may be skipped
|
||||
assert isinstance(steps, list)
|
||||
|
||||
def test_row_missing_checkpoint_skipped(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [{"thread_id": "t1", "checkpoint_id": "cp1", "checkpoint": None, "metadata": {}}]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert isinstance(steps, list)
|
||||
|
||||
def test_row_missing_messages_key_skipped(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [{"thread_id": "t1", "checkpoint_id": "cp1", "checkpoint": {}, "metadata": {}}]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert isinstance(steps, list)
|
||||
|
||||
def test_multiple_rows_steps_are_continuous(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [
|
||||
_make_row([{"type": "human", "content": "Q1"}]),
|
||||
_make_row([{"type": "ai", "content": "A1", "tool_calls": []}]),
|
||||
]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert len(steps) == 2
|
||||
assert steps[0].step == 1
|
||||
assert steps[1].step == 2
|
||||
|
||||
def test_timestamps_are_strings(self) -> None:
|
||||
from app.replay.transformer import transform_checkpoints
|
||||
|
||||
rows = [_make_row([{"type": "human", "content": "Hi"}])]
|
||||
steps = transform_checkpoints(rows)
|
||||
assert isinstance(steps[0].timestamp, str)
|
||||
Reference in New Issue
Block a user