feat: complete phase 4 -- conversation replay API + analytics dashboard

- Replay models: StepType enum, ReplayStep, ReplayPage frozen dataclasses - Checkpoint transformer: PostgresSaver JSONB -> structured timeline steps - Replay API: GET /api/conversations (paginated), GET /api/replay/{thread_id} - Analytics models: AgentUsage, InterruptStats, AnalyticsResult - Analytics event recorder: Protocol + PostgresAnalyticsRecorder + NoOp - Analytics queries: resolution_rate, agent_usage, escalation_rate, cost, interrupts - Analytics API: GET /api/analytics?range=Xd with envelope response - DB migration: analytics_events table + conversations column additions - 74 new tests, 399 total passing, 92.87% coverage
2026-03-31 13:35:45 +02:00
parent a2f750269d
commit 33db5aeb10
26 changed files with 1903 additions and 23 deletions
--- a/backend/tests/unit/analytics/init.py
+++ b/backend/tests/unit/analytics/init.py
@@ -0,0 +1 @@
+"""Unit tests for app.analytics module."""
--- a/backend/tests/unit/analytics/test_api.py
+++ b/backend/tests/unit/analytics/test_api.py
@@ -0,0 +1,149 @@
+"""Unit tests for app.analytics.api."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+pytestmark = pytest.mark.unit
+
+
+def _build_app() -> FastAPI:
+    from app.analytics.api import router
+
+    app = FastAPI()
+    app.include_router(router)
+    return app
+
+
+def _make_mock_pool() -> MagicMock:
+    mock_conn = AsyncMock()
+    mock_ctx = AsyncMock()
+    mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+    mock_ctx.__aexit__ = AsyncMock(return_value=None)
+    mock_pool = MagicMock()
+    mock_pool.connection.return_value = mock_ctx
+    return mock_pool
+
+
+def _make_analytics_result() -> object:
+    from app.analytics.models import AgentUsage, AnalyticsResult, InterruptStats
+
+    return AnalyticsResult(
+        range="7d",
+        total_conversations=50,
+        resolution_rate=0.8,
+        escalation_rate=0.1,
+        avg_turns_per_conversation=3.5,
+        avg_cost_per_conversation_usd=0.02,
+        agent_usage=(AgentUsage(agent="order_agent", count=30, percentage=60.0),),
+        interrupt_stats=InterruptStats(total=5, approved=4, rejected=1, expired=0),
+    )
+
+
+def _get_analytics(app: FastAPI, path: str = "/api/analytics", **patch_kwargs: object) -> object:
+    """Helper: patch get_analytics, make request, return (response, mock)."""
+    analytics_result = _make_analytics_result()
+    with (
+        patch("app.analytics.api.get_analytics", return_value=analytics_result) as mock_ga,
+        TestClient(app) as client,
+    ):
+        resp = client.get(path)
+    return resp, mock_ga
+
+
+class TestAnalyticsEndpoint:
+    def test_returns_200_with_default_range(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, _ = _get_analytics(app)
+
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["success"] is True
+        assert body["error"] is None
+        assert body["data"]["range"] == "7d"
+
+    def test_returns_correct_analytics_structure(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, _ = _get_analytics(app)
+
+        data = resp.json()["data"]
+        assert "total_conversations" in data
+        assert "resolution_rate" in data
+        assert "escalation_rate" in data
+        assert "avg_turns_per_conversation" in data
+        assert "avg_cost_per_conversation_usd" in data
+        assert "agent_usage" in data
+        assert "interrupt_stats" in data
+
+    def test_custom_range_7d(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, mock_ga = _get_analytics(app, "/api/analytics?range=7d")
+
+        assert resp.status_code == 200
+        mock_ga.assert_called_once()
+        call_kwargs = mock_ga.call_args
+        assert call_kwargs[1]["range_days"] == 7 or call_kwargs[0][1] == 7
+
+    def test_custom_range_30d(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, mock_ga = _get_analytics(app, "/api/analytics?range=30d")
+
+        assert resp.status_code == 200
+        call_kwargs = mock_ga.call_args
+        assert call_kwargs[1].get("range_days") == 30 or (
+            len(call_kwargs[0]) > 1 and call_kwargs[0][1] == 30
+        )
+
+    def test_invalid_range_format_returns_400(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+
+        with TestClient(app) as client:
+            resp = client.get("/api/analytics?range=invalid")
+
+        assert resp.status_code == 400
+
+    def test_range_without_d_suffix_returns_400(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+
+        with TestClient(app) as client:
+            resp = client.get("/api/analytics?range=7")
+
+        assert resp.status_code == 400
+
+    def test_agent_usage_in_response(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, _ = _get_analytics(app)
+
+        data = resp.json()["data"]
+        assert len(data["agent_usage"]) == 1
+        assert data["agent_usage"][0]["agent"] == "order_agent"
+
+    def test_interrupt_stats_in_response(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, _ = _get_analytics(app)
+
+        data = resp.json()["data"]
+        assert data["interrupt_stats"]["total"] == 5
+        assert data["interrupt_stats"]["approved"] == 4
+
+    def test_envelope_format(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool()
+        resp, _ = _get_analytics(app)
+
+        body = resp.json()
+        assert "success" in body
+        assert "data" in body
+        assert "error" in body
--- a/backend/tests/unit/analytics/test_event_recorder.py
+++ b/backend/tests/unit/analytics/test_event_recorder.py
@@ -0,0 +1,148 @@
+"""Unit tests for app.analytics.event_recorder."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+class TestAnalyticsRecorderProtocol:
+    def test_postgres_recorder_implements_protocol(self) -> None:
+        from app.analytics.event_recorder import PostgresAnalyticsRecorder
+
+        mock_pool = MagicMock()
+        recorder = PostgresAnalyticsRecorder(pool=mock_pool)
+        # Runtime check: has record method
+        assert hasattr(recorder, "record")
+        assert callable(recorder.record)
+
+    def test_noop_recorder_implements_protocol(self) -> None:
+        from app.analytics.event_recorder import NoOpAnalyticsRecorder
+
+        recorder = NoOpAnalyticsRecorder()
+        assert hasattr(recorder, "record")
+        assert callable(recorder.record)
+
+
+class TestNoOpAnalyticsRecorder:
+    @pytest.mark.asyncio
+    async def test_record_does_nothing(self) -> None:
+        from app.analytics.event_recorder import NoOpAnalyticsRecorder
+
+        recorder = NoOpAnalyticsRecorder()
+        # Should not raise
+        await recorder.record(
+            thread_id="t1",
+            event_type="tool_call",
+            agent_name="order_agent",
+            tool_name="get_order",
+            tokens_used=50,
+            cost_usd=0.001,
+        )
+
+    @pytest.mark.asyncio
+    async def test_record_with_all_params(self) -> None:
+        from app.analytics.event_recorder import NoOpAnalyticsRecorder
+
+        recorder = NoOpAnalyticsRecorder()
+        await recorder.record(
+            thread_id="t1",
+            event_type="agent_response",
+            agent_name="fallback",
+            tool_name=None,
+            tokens_used=100,
+            cost_usd=0.002,
+            duration_ms=150,
+            success=True,
+            error_message=None,
+            metadata={"extra": "data"},
+        )
+
+    @pytest.mark.asyncio
+    async def test_record_minimal_params(self) -> None:
+        from app.analytics.event_recorder import NoOpAnalyticsRecorder
+
+        recorder = NoOpAnalyticsRecorder()
+        # Only required params
+        await recorder.record(thread_id="t1", event_type="conversation_start")
+
+
+class TestPostgresAnalyticsRecorder:
+    @pytest.mark.asyncio
+    async def test_record_executes_insert(self) -> None:
+        from app.analytics.event_recorder import PostgresAnalyticsRecorder
+
+        mock_conn = AsyncMock()
+        mock_ctx = AsyncMock()
+        mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+        mock_ctx.__aexit__ = AsyncMock(return_value=None)
+        mock_pool = MagicMock()
+        mock_pool.connection.return_value = mock_ctx
+
+        recorder = PostgresAnalyticsRecorder(pool=mock_pool)
+        await recorder.record(
+            thread_id="t1",
+            event_type="tool_call",
+            agent_name="order_agent",
+            tokens_used=50,
+            cost_usd=0.001,
+        )
+        mock_conn.execute.assert_awaited_once()
+        call_args = mock_conn.execute.call_args
+        sql = call_args[0][0]
+        assert "INSERT INTO analytics_events" in sql
+
+    @pytest.mark.asyncio
+    async def test_record_passes_correct_params(self) -> None:
+        from app.analytics.event_recorder import PostgresAnalyticsRecorder
+
+        mock_conn = AsyncMock()
+        mock_ctx = AsyncMock()
+        mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+        mock_ctx.__aexit__ = AsyncMock(return_value=None)
+        mock_pool = MagicMock()
+        mock_pool.connection.return_value = mock_ctx
+
+        recorder = PostgresAnalyticsRecorder(pool=mock_pool)
+        await recorder.record(
+            thread_id="thread-xyz",
+            event_type="agent_response",
+            agent_name="discount_agent",
+            tool_name="apply_discount",
+            tokens_used=75,
+            cost_usd=0.002,
+            duration_ms=300,
+            success=True,
+            error_message=None,
+            metadata={"promo": "10PCT"},
+        )
+        call_args = mock_conn.execute.call_args
+        params = call_args[0][1]
+        assert params["thread_id"] == "thread-xyz"
+        assert params["event_type"] == "agent_response"
+        assert params["agent_name"] == "discount_agent"
+        assert params["tokens_used"] == 75
+
+    @pytest.mark.asyncio
+    async def test_record_stores_metadata_as_dict(self) -> None:
+        from app.analytics.event_recorder import PostgresAnalyticsRecorder
+
+        mock_conn = AsyncMock()
+        mock_ctx = AsyncMock()
+        mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+        mock_ctx.__aexit__ = AsyncMock(return_value=None)
+        mock_pool = MagicMock()
+        mock_pool.connection.return_value = mock_ctx
+
+        recorder = PostgresAnalyticsRecorder(pool=mock_pool)
+        await recorder.record(
+            thread_id="t1",
+            event_type="tool_call",
+            metadata={"key": "val"},
+        )
+        call_args = mock_conn.execute.call_args
+        params = call_args[0][1]
+        assert params["metadata"] == {"key": "val"}
--- a/backend/tests/unit/analytics/test_models.py
+++ b/backend/tests/unit/analytics/test_models.py
@@ -0,0 +1,106 @@
+"""Unit tests for app.analytics.models."""
+
+from __future__ import annotations
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+class TestAgentUsage:
+    def test_agent_usage_construction(self) -> None:
+        from app.analytics.models import AgentUsage
+
+        au = AgentUsage(agent="order_agent", count=10, percentage=50.0)
+        assert au.agent == "order_agent"
+        assert au.count == 10
+        assert au.percentage == 50.0
+
+    def test_agent_usage_is_frozen(self) -> None:
+        from app.analytics.models import AgentUsage
+
+        au = AgentUsage(agent="a", count=1, percentage=100.0)
+        with pytest.raises((AttributeError, TypeError)):
+            au.count = 2  # type: ignore[misc]
+
+
+class TestInterruptStats:
+    def test_interrupt_stats_defaults(self) -> None:
+        from app.analytics.models import InterruptStats
+
+        stats = InterruptStats()
+        assert stats.total == 0
+        assert stats.approved == 0
+        assert stats.rejected == 0
+        assert stats.expired == 0
+
+    def test_interrupt_stats_custom_values(self) -> None:
+        from app.analytics.models import InterruptStats
+
+        stats = InterruptStats(total=10, approved=7, rejected=2, expired=1)
+        assert stats.total == 10
+        assert stats.approved == 7
+        assert stats.rejected == 2
+        assert stats.expired == 1
+
+    def test_interrupt_stats_is_frozen(self) -> None:
+        from app.analytics.models import InterruptStats
+
+        stats = InterruptStats()
+        with pytest.raises((AttributeError, TypeError)):
+            stats.total = 5  # type: ignore[misc]
+
+
+class TestAnalyticsResult:
+    def test_analytics_result_construction(self) -> None:
+        from app.analytics.models import AgentUsage, AnalyticsResult, InterruptStats
+
+        result = AnalyticsResult(
+            range="7d",
+            total_conversations=100,
+            resolution_rate=0.85,
+            escalation_rate=0.05,
+            avg_turns_per_conversation=4.2,
+            avg_cost_per_conversation_usd=0.03,
+            agent_usage=(AgentUsage(agent="order_agent", count=60, percentage=60.0),),
+            interrupt_stats=InterruptStats(total=5, approved=4, rejected=1, expired=0),
+        )
+        assert result.range == "7d"
+        assert result.total_conversations == 100
+        assert result.resolution_rate == 0.85
+        assert result.escalation_rate == 0.05
+        assert result.avg_turns_per_conversation == 4.2
+        assert result.avg_cost_per_conversation_usd == 0.03
+        assert len(result.agent_usage) == 1
+        assert result.interrupt_stats.total == 5
+
+    def test_analytics_result_is_frozen(self) -> None:
+        from app.analytics.models import AnalyticsResult, InterruptStats
+
+        result = AnalyticsResult(
+            range="7d",
+            total_conversations=0,
+            resolution_rate=0.0,
+            escalation_rate=0.0,
+            avg_turns_per_conversation=0.0,
+            avg_cost_per_conversation_usd=0.0,
+            agent_usage=(),
+            interrupt_stats=InterruptStats(),
+        )
+        with pytest.raises((AttributeError, TypeError)):
+            result.range = "30d"  # type: ignore[misc]
+
+    def test_analytics_result_empty_agent_usage(self) -> None:
+        from app.analytics.models import AnalyticsResult, InterruptStats
+
+        result = AnalyticsResult(
+            range="7d",
+            total_conversations=0,
+            resolution_rate=0.0,
+            escalation_rate=0.0,
+            avg_turns_per_conversation=0.0,
+            avg_cost_per_conversation_usd=0.0,
+            agent_usage=(),
+            interrupt_stats=InterruptStats(),
+        )
+        assert result.agent_usage == ()
--- a/backend/tests/unit/analytics/test_queries.py
+++ b/backend/tests/unit/analytics/test_queries.py
@@ -0,0 +1,213 @@
+"""Unit tests for app.analytics.queries."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+def _make_pool_with_fetchone(result: dict | None) -> MagicMock:
+    mock_cursor = AsyncMock()
+    mock_cursor.fetchone = AsyncMock(return_value=result)
+    mock_conn = AsyncMock()
+    mock_conn.execute = AsyncMock(return_value=mock_cursor)
+    mock_ctx = AsyncMock()
+    mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+    mock_ctx.__aexit__ = AsyncMock(return_value=None)
+    mock_pool = MagicMock()
+    mock_pool.connection.return_value = mock_ctx
+    return mock_pool
+
+
+def _make_pool_with_fetchall(result: list[dict]) -> MagicMock:
+    mock_cursor = AsyncMock()
+    mock_cursor.fetchall = AsyncMock(return_value=result)
+    mock_conn = AsyncMock()
+    mock_conn.execute = AsyncMock(return_value=mock_cursor)
+    mock_ctx = AsyncMock()
+    mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+    mock_ctx.__aexit__ = AsyncMock(return_value=None)
+    mock_pool = MagicMock()
+    mock_pool.connection.return_value = mock_ctx
+    return mock_pool
+
+
+class TestResolutionRate:
+    @pytest.mark.asyncio
+    async def test_returns_float(self) -> None:
+        from app.analytics.queries import resolution_rate
+
+        pool = _make_pool_with_fetchone({"rate": 0.85})
+        result = await resolution_rate(pool, range_days=7)
+        assert isinstance(result, float)
+
+    @pytest.mark.asyncio
+    async def test_zero_state_returns_zero(self) -> None:
+        from app.analytics.queries import resolution_rate
+
+        pool = _make_pool_with_fetchone(None)
+        result = await resolution_rate(pool, range_days=7)
+        assert result == 0.0
+
+    @pytest.mark.asyncio
+    async def test_returns_correct_value(self) -> None:
+        from app.analytics.queries import resolution_rate
+
+        pool = _make_pool_with_fetchone({"rate": 0.75})
+        result = await resolution_rate(pool, range_days=7)
+        assert result == 0.75
+
+
+class TestAgentUsageQuery:
+    @pytest.mark.asyncio
+    async def test_returns_tuple(self) -> None:
+        from app.analytics.queries import agent_usage
+
+        pool = _make_pool_with_fetchall([])
+        result = await agent_usage(pool, range_days=7)
+        assert isinstance(result, tuple)
+
+    @pytest.mark.asyncio
+    async def test_empty_state_returns_empty_tuple(self) -> None:
+        from app.analytics.queries import agent_usage
+
+        pool = _make_pool_with_fetchall([])
+        result = await agent_usage(pool, range_days=7)
+        assert result == ()
+
+    @pytest.mark.asyncio
+    async def test_maps_rows_to_agent_usage_objects(self) -> None:
+        from app.analytics.models import AgentUsage
+        from app.analytics.queries import agent_usage
+
+        pool = _make_pool_with_fetchall([
+            {"agent": "order_agent", "count": 10, "percentage": 66.7},
+            {"agent": "discount_agent", "count": 5, "percentage": 33.3},
+        ])
+        result = await agent_usage(pool, range_days=7)
+        assert len(result) == 2
+        assert isinstance(result[0], AgentUsage)
+        assert result[0].agent == "order_agent"
+        assert result[0].count == 10
+
+
+class TestEscalationRate:
+    @pytest.mark.asyncio
+    async def test_returns_float(self) -> None:
+        from app.analytics.queries import escalation_rate
+
+        pool = _make_pool_with_fetchone({"rate": 0.05})
+        result = await escalation_rate(pool, range_days=7)
+        assert isinstance(result, float)
+
+    @pytest.mark.asyncio
+    async def test_zero_state_returns_zero(self) -> None:
+        from app.analytics.queries import escalation_rate
+
+        pool = _make_pool_with_fetchone(None)
+        result = await escalation_rate(pool, range_days=7)
+        assert result == 0.0
+
+
+class TestCostPerConversation:
+    @pytest.mark.asyncio
+    async def test_returns_float(self) -> None:
+        from app.analytics.queries import cost_per_conversation
+
+        pool = _make_pool_with_fetchone({"avg_cost": 0.03})
+        result = await cost_per_conversation(pool, range_days=7)
+        assert isinstance(result, float)
+
+    @pytest.mark.asyncio
+    async def test_zero_state_returns_zero(self) -> None:
+        from app.analytics.queries import cost_per_conversation
+
+        pool = _make_pool_with_fetchone(None)
+        result = await cost_per_conversation(pool, range_days=7)
+        assert result == 0.0
+
+
+class TestInterruptStatsQuery:
+    @pytest.mark.asyncio
+    async def test_returns_interrupt_stats(self) -> None:
+        from app.analytics.models import InterruptStats
+        from app.analytics.queries import interrupt_stats
+
+        pool = _make_pool_with_fetchone(
+            {"total": 10, "approved": 7, "rejected": 2, "expired": 1}
+        )
+        result = await interrupt_stats(pool, range_days=7)
+        assert isinstance(result, InterruptStats)
+        assert result.total == 10
+        assert result.approved == 7
+
+    @pytest.mark.asyncio
+    async def test_zero_state_returns_zeros(self) -> None:
+        from app.analytics.models import InterruptStats
+        from app.analytics.queries import interrupt_stats
+
+        pool = _make_pool_with_fetchone(None)
+        result = await interrupt_stats(pool, range_days=7)
+        assert isinstance(result, InterruptStats)
+        assert result.total == 0
+        assert result.approved == 0
+        assert result.rejected == 0
+        assert result.expired == 0
+
+
+class TestGetAnalytics:
+    @pytest.mark.asyncio
+    async def test_returns_analytics_result(self) -> None:
+        from unittest.mock import patch
+
+        from app.analytics.models import AnalyticsResult, InterruptStats
+        from app.analytics.queries import get_analytics
+
+        mock_pool = MagicMock()
+
+        with (
+            patch("app.analytics.queries.resolution_rate", return_value=0.85),
+            patch("app.analytics.queries.escalation_rate", return_value=0.05),
+            patch("app.analytics.queries.cost_per_conversation", return_value=0.03),
+            patch("app.analytics.queries.agent_usage", return_value=()),
+            patch(
+                "app.analytics.queries.interrupt_stats",
+                return_value=InterruptStats(),
+            ),
+            patch("app.analytics.queries._total_conversations", return_value=100),
+            patch("app.analytics.queries._avg_turns", return_value=4.2),
+        ):
+            result = await get_analytics(mock_pool, range_days=7)
+
+        assert isinstance(result, AnalyticsResult)
+        assert result.range == "7d"
+        assert result.total_conversations == 100
+        assert result.resolution_rate == 0.85
+
+    @pytest.mark.asyncio
+    async def test_zero_state_returns_zeros(self) -> None:
+        from unittest.mock import patch
+
+        from app.analytics.models import AnalyticsResult, InterruptStats
+        from app.analytics.queries import get_analytics
+
+        mock_pool = MagicMock()
+
+        with (
+            patch("app.analytics.queries.resolution_rate", return_value=0.0),
+            patch("app.analytics.queries.escalation_rate", return_value=0.0),
+            patch("app.analytics.queries.cost_per_conversation", return_value=0.0),
+            patch("app.analytics.queries.agent_usage", return_value=()),
+            patch("app.analytics.queries.interrupt_stats", return_value=InterruptStats()),
+            patch("app.analytics.queries._total_conversations", return_value=0),
+            patch("app.analytics.queries._avg_turns", return_value=0.0),
+        ):
+            result = await get_analytics(mock_pool, range_days=7)
+
+        assert isinstance(result, AnalyticsResult)
+        assert result.total_conversations == 0
+        assert result.resolution_rate == 0.0
+        assert result.agent_usage == ()
--- a/backend/tests/unit/replay/init.py
+++ b/backend/tests/unit/replay/init.py
@@ -0,0 +1 @@
+"""Unit tests for app.replay module."""
--- a/backend/tests/unit/replay/test_api.py
+++ b/backend/tests/unit/replay/test_api.py
@@ -0,0 +1,160 @@
+"""Unit tests for app.replay.api."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+pytestmark = pytest.mark.unit
+
+
+def _build_app() -> FastAPI:
+    from app.replay.api import router
+
+    app = FastAPI()
+    app.include_router(router)
+    return app
+
+
+def _make_mock_pool(fetchall_result: list[dict]) -> MagicMock:
+    """Build a mock pool that returns the given rows from fetchall."""
+    mock_cursor = AsyncMock()
+    mock_cursor.fetchall = AsyncMock(return_value=fetchall_result)
+
+    mock_conn = AsyncMock()
+    mock_conn.execute = AsyncMock(return_value=mock_cursor)
+
+    mock_ctx = AsyncMock()
+    mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+    mock_ctx.__aexit__ = AsyncMock(return_value=None)
+
+    mock_pool = MagicMock()
+    mock_pool.connection.return_value = mock_ctx
+    return mock_pool
+
+
+class TestListConversations:
+    def test_returns_200_with_empty_list(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/conversations")
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["success"] is True
+        assert isinstance(body["data"], list)
+        assert body["error"] is None
+
+    def test_returns_conversations_list(self) -> None:
+        app = _build_app()
+        mock_rows = [
+            {
+                "thread_id": "t1",
+                "created_at": "2026-01-01T00:00:00",
+                "last_activity": "2026-01-01T00:01:00",
+                "status": "active",
+                "total_tokens": 100,
+                "total_cost_usd": 0.01,
+            }
+        ]
+        app.state.pool = _make_mock_pool(mock_rows)
+
+        with TestClient(app) as client:
+            resp = client.get("/api/conversations")
+        body = resp.json()
+        assert resp.status_code == 200
+        assert len(body["data"]) == 1
+        assert body["data"][0]["thread_id"] == "t1"
+
+    def test_pagination_defaults(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/conversations")
+        assert resp.status_code == 200
+
+    def test_pagination_custom_params(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/conversations?page=2&per_page=10")
+        assert resp.status_code == 200
+
+    def test_per_page_max_capped_at_100(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/conversations?per_page=200")
+        # FastAPI validation rejects values > 100
+        assert resp.status_code in (200, 422)
+
+
+class TestGetReplay:
+    def test_thread_not_found_returns_404(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/replay/nonexistent-thread")
+        assert resp.status_code == 404
+
+    def test_returns_replay_page_for_existing_thread(self) -> None:
+        app = _build_app()
+        mock_rows = [
+            {
+                "thread_id": "thread-123",
+                "checkpoint_id": "cp-001",
+                "checkpoint": {
+                    "channel_values": {
+                        "messages": [{"type": "human", "content": "Hello"}]
+                    }
+                },
+                "metadata": {},
+            }
+        ]
+        app.state.pool = _make_mock_pool(mock_rows)
+
+        with TestClient(app) as client:
+            resp = client.get("/api/replay/thread-123")
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["success"] is True
+        assert body["data"]["thread_id"] == "thread-123"
+        assert "steps" in body["data"]
+        assert "total_steps" in body["data"]
+        assert "page" in body["data"]
+        assert "per_page" in body["data"]
+
+    def test_replay_pagination_params(self) -> None:
+        app = _build_app()
+        mock_rows = [
+            {
+                "thread_id": "t1",
+                "checkpoint_id": "cp-001",
+                "checkpoint": {
+                    "channel_values": {"messages": [{"type": "human", "content": "Hi"}]}
+                },
+                "metadata": {},
+            }
+        ]
+        app.state.pool = _make_mock_pool(mock_rows)
+
+        with TestClient(app) as client:
+            resp = client.get("/api/replay/t1?page=1&per_page=5")
+        assert resp.status_code == 200
+
+    def test_error_response_has_envelope(self) -> None:
+        app = _build_app()
+        app.state.pool = _make_mock_pool([])
+
+        with TestClient(app) as client:
+            resp = client.get("/api/replay/missing")
+        body = resp.json()
+        assert "detail" in body or "error" in body or resp.status_code == 404
--- a/backend/tests/unit/replay/test_models.py
+++ b/backend/tests/unit/replay/test_models.py
@@ -0,0 +1,134 @@
+"""Unit tests for app.replay.models."""
+
+from __future__ import annotations
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+class TestStepType:
+    def test_all_step_types_exist(self) -> None:
+        from app.replay.models import StepType
+
+        assert StepType.user_message
+        assert StepType.supervisor_routing
+        assert StepType.tool_call
+        assert StepType.tool_result
+        assert StepType.agent_response
+        assert StepType.interrupt
+
+    def test_step_type_values(self) -> None:
+        from app.replay.models import StepType
+
+        assert StepType.user_message.value == "user_message"
+        assert StepType.tool_call.value == "tool_call"
+        assert StepType.agent_response.value == "agent_response"
+
+
+class TestReplayStep:
+    def test_minimal_replay_step(self) -> None:
+        from app.replay.models import ReplayStep, StepType
+
+        step = ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z")
+        assert step.step == 1
+        assert step.type == StepType.user_message
+        assert step.timestamp == "2026-01-01T00:00:00Z"
+        assert step.content == ""
+        assert step.agent is None
+        assert step.tool is None
+        assert step.params is None
+        assert step.result is None
+        assert step.reasoning is None
+        assert step.tokens is None
+        assert step.duration_ms is None
+
+    def test_full_replay_step(self) -> None:
+        from app.replay.models import ReplayStep, StepType
+
+        step = ReplayStep(
+            step=2,
+            type=StepType.tool_call,
+            timestamp="2026-01-01T00:00:01Z",
+            content="calling get_order",
+            agent="order_agent",
+            tool="get_order_status",
+            params={"order_id": "ORD-123"},
+            result={"status": "shipped"},
+            reasoning="user asked about order",
+            tokens=50,
+            duration_ms=200,
+        )
+        assert step.step == 2
+        assert step.agent == "order_agent"
+        assert step.tool == "get_order_status"
+        assert step.params == {"order_id": "ORD-123"}
+        assert step.tokens == 50
+
+    def test_replay_step_is_frozen(self) -> None:
+        from app.replay.models import ReplayStep, StepType
+
+        step = ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z")
+        with pytest.raises((AttributeError, TypeError)):
+            step.step = 99  # type: ignore[misc]
+
+    def test_replay_step_params_is_immutable_copy(self) -> None:
+        from app.replay.models import ReplayStep, StepType
+
+        params = {"key": "value"}
+        step = ReplayStep(
+            step=1,
+            type=StepType.tool_call,
+            timestamp="2026-01-01T00:00:00Z",
+            params=params,
+        )
+        # Modifying original dict should not affect step
+        params["new_key"] = "new_value"
+        assert "new_key" not in (step.params or {})
+
+
+class TestReplayPage:
+    def test_replay_page_construction(self) -> None:
+        from app.replay.models import ReplayPage, ReplayStep, StepType
+
+        steps = (
+            ReplayStep(step=1, type=StepType.user_message, timestamp="2026-01-01T00:00:00Z"),
+            ReplayStep(step=2, type=StepType.agent_response, timestamp="2026-01-01T00:00:01Z"),
+        )
+        page = ReplayPage(
+            thread_id="thread-123",
+            total_steps=2,
+            page=1,
+            per_page=20,
+            steps=steps,
+        )
+        assert page.thread_id == "thread-123"
+        assert page.total_steps == 2
+        assert page.page == 1
+        assert page.per_page == 20
+        assert len(page.steps) == 2
+
+    def test_replay_page_is_frozen(self) -> None:
+        from app.replay.models import ReplayPage
+
+        page = ReplayPage(
+            thread_id="t1",
+            total_steps=0,
+            page=1,
+            per_page=20,
+            steps=(),
+        )
+        with pytest.raises((AttributeError, TypeError)):
+            page.page = 2  # type: ignore[misc]
+
+    def test_replay_page_empty_steps(self) -> None:
+        from app.replay.models import ReplayPage
+
+        page = ReplayPage(
+            thread_id="t1",
+            total_steps=0,
+            page=1,
+            per_page=20,
+            steps=(),
+        )
+        assert page.steps == ()
--- a/backend/tests/unit/replay/test_transformer.py
+++ b/backend/tests/unit/replay/test_transformer.py
@@ -0,0 +1,155 @@
+"""Unit tests for app.replay.transformer."""
+
+from __future__ import annotations
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+def _make_row(messages: list[dict], metadata: dict | None = None) -> dict:
+    """Helper to build a checkpoint row with the given messages."""
+    return {
+        "thread_id": "thread-abc",
+        "checkpoint_id": "cp-001",
+        "checkpoint": {"channel_values": {"messages": messages}},
+        "metadata": metadata or {},
+    }
+
+
+class TestTransformCheckpoints:
+    def test_empty_rows_returns_empty_list(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        result = transform_checkpoints([])
+        assert result == []
+
+    def test_human_message_produces_user_message_step(self) -> None:
+        from app.replay.models import StepType
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [_make_row([{"type": "human", "content": "Hello, I need help"}])]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 1
+        assert steps[0].type == StepType.user_message
+        assert steps[0].content == "Hello, I need help"
+        assert steps[0].step == 1
+
+    def test_ai_message_with_content_produces_agent_response(self) -> None:
+        from app.replay.models import StepType
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [
+            _make_row(
+                [{"type": "ai", "content": "I can help you with that.", "tool_calls": []}],
+                metadata={"writes": {"some_agent": "response"}},
+            )
+        ]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 1
+        assert steps[0].type == StepType.agent_response
+        assert steps[0].content == "I can help you with that."
+
+    def test_ai_message_with_tool_calls_produces_tool_call_step(self) -> None:
+        from app.replay.models import StepType
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [
+            _make_row(
+                [
+                    {
+                        "type": "ai",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "name": "get_order_status",
+                                "args": {"order_id": "ORD-123"},
+                                "id": "call_abc",
+                            }
+                        ],
+                    }
+                ]
+            )
+        ]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 1
+        assert steps[0].type == StepType.tool_call
+        assert steps[0].tool == "get_order_status"
+        assert steps[0].params == {"order_id": "ORD-123"}
+
+    def test_tool_message_produces_tool_result_step(self) -> None:
+        from app.replay.models import StepType
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [
+            _make_row(
+                [
+                    {
+                        "type": "tool",
+                        "content": '{"status": "shipped"}',
+                        "name": "get_order_status",
+                    }
+                ]
+            )
+        ]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 1
+        assert steps[0].type == StepType.tool_result
+        assert steps[0].tool == "get_order_status"
+
+    def test_multiple_messages_sequential_steps(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [
+            _make_row(
+                [
+                    {"type": "human", "content": "Help"},
+                    {"type": "ai", "content": "Sure!", "tool_calls": []},
+                ]
+            )
+        ]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 2
+        assert steps[0].step == 1
+        assert steps[1].step == 2
+
+    def test_unknown_message_type_skipped(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [_make_row([{"type": "unknown_type", "content": "test"}])]
+        steps = transform_checkpoints(rows)
+        # Should not crash; unknown types may be skipped
+        assert isinstance(steps, list)
+
+    def test_row_missing_checkpoint_skipped(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [{"thread_id": "t1", "checkpoint_id": "cp1", "checkpoint": None, "metadata": {}}]
+        steps = transform_checkpoints(rows)
+        assert isinstance(steps, list)
+
+    def test_row_missing_messages_key_skipped(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [{"thread_id": "t1", "checkpoint_id": "cp1", "checkpoint": {}, "metadata": {}}]
+        steps = transform_checkpoints(rows)
+        assert isinstance(steps, list)
+
+    def test_multiple_rows_steps_are_continuous(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [
+            _make_row([{"type": "human", "content": "Q1"}]),
+            _make_row([{"type": "ai", "content": "A1", "tool_calls": []}]),
+        ]
+        steps = transform_checkpoints(rows)
+        assert len(steps) == 2
+        assert steps[0].step == 1
+        assert steps[1].step == 2
+
+    def test_timestamps_are_strings(self) -> None:
+        from app.replay.transformer import transform_checkpoints
+
+        rows = [_make_row([{"type": "human", "content": "Hi"}])]
+        steps = transform_checkpoints(rows)
+        assert isinstance(steps[0].timestamp, str)
--- a/backend/tests/unit/test_db.py
+++ b/backend/tests/unit/test_db.py
@@ -55,7 +55,7 @@ class TestDbModule:
        from app.db import setup_app_tables

        await setup_app_tables(mock_pool)
-        assert mock_conn.execute.await_count == 2
+        assert mock_conn.execute.await_count == 4

    def test_ddl_statements_valid(self) -> None:
        assert "CREATE TABLE IF NOT EXISTS conversations" in _CONVERSATIONS_DDL
--- a/backend/tests/unit/test_db_phase4.py
+++ b/backend/tests/unit/test_db_phase4.py
@@ -0,0 +1,55 @@
+"""Phase 4 DB migration tests -- analytics_events table and conversation columns."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+pytestmark = pytest.mark.unit
+
+
+class TestAnalyticsEventsDDL:
+    def test_analytics_events_ddl_exists(self) -> None:
+        from app.db import _ANALYTICS_EVENTS_DDL
+
+        assert "CREATE TABLE IF NOT EXISTS analytics_events" in _ANALYTICS_EVENTS_DDL
+
+    def test_analytics_events_ddl_has_required_columns(self) -> None:
+        from app.db import _ANALYTICS_EVENTS_DDL
+
+        assert "thread_id" in _ANALYTICS_EVENTS_DDL
+        assert "event_type" in _ANALYTICS_EVENTS_DDL
+        assert "agent_name" in _ANALYTICS_EVENTS_DDL
+        assert "tool_name" in _ANALYTICS_EVENTS_DDL
+        assert "tokens_used" in _ANALYTICS_EVENTS_DDL
+        assert "cost_usd" in _ANALYTICS_EVENTS_DDL
+        assert "duration_ms" in _ANALYTICS_EVENTS_DDL
+        assert "success" in _ANALYTICS_EVENTS_DDL
+        assert "error_message" in _ANALYTICS_EVENTS_DDL
+        assert "metadata" in _ANALYTICS_EVENTS_DDL
+
+    def test_conversations_migration_ddl_exists(self) -> None:
+        from app.db import _CONVERSATIONS_MIGRATION_DDL
+
+        assert "ALTER TABLE" in _CONVERSATIONS_MIGRATION_DDL
+        assert "resolution_type" in _CONVERSATIONS_MIGRATION_DDL
+        assert "agents_used" in _CONVERSATIONS_MIGRATION_DDL
+        assert "turn_count" in _CONVERSATIONS_MIGRATION_DDL
+        assert "ended_at" in _CONVERSATIONS_MIGRATION_DDL
+        assert "IF NOT EXISTS" in _CONVERSATIONS_MIGRATION_DDL
+
+    @pytest.mark.asyncio
+    async def test_setup_app_tables_executes_analytics_ddl(self) -> None:
+        mock_conn = AsyncMock()
+        mock_ctx = AsyncMock()
+        mock_ctx.__aenter__ = AsyncMock(return_value=mock_conn)
+        mock_ctx.__aexit__ = AsyncMock(return_value=None)
+        mock_pool = MagicMock()
+        mock_pool.connection.return_value = mock_ctx
+
+        from app.db import setup_app_tables
+
+        await setup_app_tables(mock_pool)
+        # Now expects 4 statements: conversations, interrupts, analytics_events, migrations
+        assert mock_conn.execute.await_count == 4
--- a/backend/tests/unit/test_main.py
+++ b/backend/tests/unit/test_main.py
@@ -13,7 +13,7 @@ class TestMainModule:
        assert app.title == "Smart Support"

    def test_app_version(self) -> None:
-        assert app.version == "0.3.0"
+        assert app.version == "0.4.0"

    def test_agents_yaml_path_exists(self) -> None:
        assert AGENTS_YAML.name == "agents.yaml"
@@ -25,3 +25,11 @@ class TestMainModule:
    def test_websocket_route_registered(self) -> None:
        routes = [r.path for r in app.routes if hasattr(r, "path")]
        assert "/ws" in routes
+
+    def test_replay_router_registered(self) -> None:
+        routes = [r.path for r in app.routes if hasattr(r, "path")]
+        assert any("replay" in p or "conversations" in p for p in routes)
+
+    def test_analytics_router_registered(self) -> None:
+        routes = [r.path for r in app.routes if hasattr(r, "path")]
+        assert any("analytics" in p for p in routes)