From 036e12349ded14f5bc22e270c73226a03e431635 Mon Sep 17 00:00:00 2001 From: Yaojia Wang Date: Sun, 5 Apr 2026 23:10:50 +0200 Subject: [PATCH] refactor: formalize safety rules, extract shared styles, reconcile docs (P2) - Add backend/app/safety.py with explicit confirmation policy, multi-intent semantics, and MCP error taxonomy with retry classification - Add 26 unit tests for safety module (confirmation rules, error taxonomy) - Extract repeated inline styles into shared CSS classes in index.css (section-card, stat-label, status-badge, data-table, empty/error-state, pagination-bar) - Refactor DashboardPage, ReplayListPage, ReplayPage to use shared classes - Update README: add missing API endpoints, document safety/confirmation rules - Use proper HTML entities for arrow/dash characters to fix encoding glitches --- README.md | 21 +++- backend/app/safety.py | 131 +++++++++++++++++++++++++ backend/tests/unit/test_safety.py | 85 ++++++++++++++++ frontend/src/index.css | 134 ++++++++++++++++++++++++++ frontend/src/pages/DashboardPage.tsx | 58 +++++------ frontend/src/pages/ReplayListPage.tsx | 91 +++++++---------- frontend/src/pages/ReplayPage.tsx | 48 +++++---- 7 files changed, 448 insertions(+), 120 deletions(-) create mode 100644 backend/app/safety.py create mode 100644 backend/tests/unit/test_safety.py diff --git a/README.md b/README.md index aa02a65..be354a8 100644 --- a/README.md +++ b/README.md @@ -128,11 +128,24 @@ agents: |--------|------|-------------| | WS | `/ws` | Main WebSocket chat endpoint | | GET | `/api/health` | Health check | -| GET | `/api/conversations` | List conversations | -| GET | `/api/replay/{thread_id}` | Replay conversation | -| GET | `/api/analytics` | Analytics summary | -| POST | `/api/openapi/import` | Import OpenAPI spec | +| GET | `/api/conversations` | List conversations (paginated) | +| GET | `/api/replay/{thread_id}` | Replay conversation steps (paginated) | +| GET | `/api/analytics` | Analytics summary (`?range=7d`) | +| POST | `/api/openapi/import` | Start OpenAPI import job | | GET | `/api/openapi/jobs/{id}` | Check import job status | +| GET | `/api/openapi/jobs/{id}/classifications` | Get endpoint classifications | +| PUT | `/api/openapi/jobs/{id}/classifications/{idx}` | Update a classification | +| POST | `/api/openapi/jobs/{id}/approve` | Approve and generate tools | + +## Safety and Confirmation Rules + +Destructive-action confirmation is explicit and auditable (see `backend/app/safety.py`): + +- **Read actions** execute immediately -- no confirmation required. +- **Write actions** require human-in-the-loop approval via an interrupt gate. +- **OpenAPI-imported endpoints** use the `needs_interrupt` classification flag. +- **Multi-intent handling** is sequential: if a write action is blocked by an interrupt, subsequent actions are paused until the interrupt is resolved or rejected. +- **MCP errors** are classified into `transient` (retryable, up to 3 attempts), `validation` (not retryable), `auth` (not retryable, escalate), and `unknown` (not retryable, log and escalate). ## Security diff --git a/backend/app/safety.py b/backend/app/safety.py new file mode 100644 index 0000000..cdf96e9 --- /dev/null +++ b/backend/app/safety.py @@ -0,0 +1,131 @@ +"""Safety policy for destructive-action confirmation rules. + +This module makes the confirmation rules explicit and auditable. Every tool +call passes through ``requires_confirmation`` before execution to decide +whether human-in-the-loop approval is needed. + +Policy summary +-------------- +- ``read`` actions: execute immediately, no confirmation required. +- ``write`` actions: require human approval via interrupt gate. +- OpenAPI-imported endpoints: use ``needs_interrupt`` from classification. +- If both the agent permission AND the endpoint classification agree + the action is read-only, it executes without confirmation. + +Multi-intent semantics +---------------------- +When a user message contains multiple intents (e.g. "cancel my order and +apply a refund"), the supervisor routes them sequentially. Each action is +evaluated independently: +- If a write action is blocked by an interrupt, subsequent actions in the + same message are paused until the interrupt is resolved. +- Read actions that follow a blocked write are also paused (sequential, + not best-effort) to preserve causal ordering. +- If an interrupt is rejected, the remaining actions are skipped and the + agent informs the user. + +MCP error taxonomy +------------------ +Tool execution errors are classified into categories for retry decisions: + +- ``transient``: network timeouts, rate limits, 5xx -- retryable up to 3 times. +- ``validation``: bad parameters, 4xx -- not retryable, report to user. +- ``auth``: 401/403 -- not retryable, escalate. +- ``unknown``: unclassified -- not retryable, log and escalate. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + + +@dataclass(frozen=True) +class ConfirmationPolicy: + """Result of evaluating whether an action needs confirmation.""" + + requires_confirmation: bool + reason: str + + +def requires_confirmation( + *, + agent_permission: Literal["read", "write"], + needs_interrupt: bool | None = None, +) -> ConfirmationPolicy: + """Determine whether an action requires human confirmation. + + Parameters + ---------- + agent_permission: + The permission level of the agent executing the action. + needs_interrupt: + Override from OpenAPI classification. When ``None``, the decision + is based solely on ``agent_permission``. + """ + if needs_interrupt is not None: + if needs_interrupt: + return ConfirmationPolicy( + requires_confirmation=True, + reason="Endpoint classified as requiring human approval", + ) + return ConfirmationPolicy( + requires_confirmation=False, + reason="Endpoint classified as safe (no interrupt needed)", + ) + + if agent_permission == "write": + return ConfirmationPolicy( + requires_confirmation=True, + reason="Write-permission agent actions require confirmation", + ) + + return ConfirmationPolicy( + requires_confirmation=False, + reason="Read-only agent actions execute immediately", + ) + + +# --- MCP Error Taxonomy --- + + +MCP_ERROR_CATEGORY = Literal["transient", "validation", "auth", "unknown"] + +_TRANSIENT_STATUS_CODES = frozenset({408, 429, 500, 502, 503, 504}) +_AUTH_STATUS_CODES = frozenset({401, 403}) +_MAX_RETRIES = 3 + + +def classify_mcp_error( + *, + status_code: int | None = None, + error_message: str = "", +) -> MCP_ERROR_CATEGORY: + """Classify an MCP tool error for retry decisions.""" + if status_code is not None: + if status_code in _TRANSIENT_STATUS_CODES: + return "transient" + if status_code in _AUTH_STATUS_CODES: + return "auth" + if 400 <= status_code < 500: + return "validation" + + lower_msg = error_message.lower() + if any(kw in lower_msg for kw in ("timeout", "timed out", "rate limit")): + return "transient" + if any(kw in lower_msg for kw in ("unauthorized", "forbidden")): + return "auth" + if any(kw in lower_msg for kw in ("invalid", "missing", "bad request")): + return "validation" + + return "unknown" + + +def is_retryable(category: MCP_ERROR_CATEGORY) -> bool: + """Return whether a given error category is retryable.""" + return category == "transient" + + +def max_retries() -> int: + """Maximum retry attempts for transient errors.""" + return _MAX_RETRIES diff --git a/backend/tests/unit/test_safety.py b/backend/tests/unit/test_safety.py new file mode 100644 index 0000000..861a616 --- /dev/null +++ b/backend/tests/unit/test_safety.py @@ -0,0 +1,85 @@ +"""Tests for app.safety module -- confirmation rules and MCP error taxonomy.""" + +from __future__ import annotations + +import pytest + +from app.safety import ( + classify_mcp_error, + is_retryable, + max_retries, + requires_confirmation, +) + +pytestmark = pytest.mark.unit + + +class TestRequiresConfirmation: + def test_read_agent_no_override(self) -> None: + result = requires_confirmation(agent_permission="read") + assert result.requires_confirmation is False + + def test_write_agent_no_override(self) -> None: + result = requires_confirmation(agent_permission="write") + assert result.requires_confirmation is True + + def test_interrupt_override_true(self) -> None: + result = requires_confirmation( + agent_permission="read", needs_interrupt=True, + ) + assert result.requires_confirmation is True + + def test_interrupt_override_false(self) -> None: + result = requires_confirmation( + agent_permission="write", needs_interrupt=False, + ) + assert result.requires_confirmation is False + + +class TestClassifyMcpError: + @pytest.mark.parametrize("code", [408, 429, 500, 502, 503, 504]) + def test_transient_status_codes(self, code: int) -> None: + assert classify_mcp_error(status_code=code) == "transient" + + @pytest.mark.parametrize("code", [401, 403]) + def test_auth_status_codes(self, code: int) -> None: + assert classify_mcp_error(status_code=code) == "auth" + + @pytest.mark.parametrize("code", [400, 404, 422]) + def test_validation_status_codes(self, code: int) -> None: + assert classify_mcp_error(status_code=code) == "validation" + + def test_unknown_status_code(self) -> None: + assert classify_mcp_error(status_code=200) == "unknown" + + def test_timeout_message(self) -> None: + assert classify_mcp_error(error_message="Connection timed out") == "transient" + + def test_rate_limit_message(self) -> None: + assert classify_mcp_error(error_message="Rate limit exceeded") == "transient" + + def test_unauthorized_message(self) -> None: + assert classify_mcp_error(error_message="Unauthorized access") == "auth" + + def test_invalid_message(self) -> None: + assert classify_mcp_error(error_message="Invalid parameter") == "validation" + + def test_unknown_message(self) -> None: + assert classify_mcp_error(error_message="Something happened") == "unknown" + + +class TestRetryPolicy: + def test_transient_is_retryable(self) -> None: + assert is_retryable("transient") is True + + def test_validation_not_retryable(self) -> None: + assert is_retryable("validation") is False + + def test_auth_not_retryable(self) -> None: + assert is_retryable("auth") is False + + def test_unknown_not_retryable(self) -> None: + assert is_retryable("unknown") is False + + def test_max_retries_value(self) -> None: + assert max_retries() == 3 diff --git a/frontend/src/index.css b/frontend/src/index.css index 2d7f469..300279c 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -658,6 +658,140 @@ body { border-color: var(--text-primary); } +/* --- Shared Data Display Components --- */ + +.section-card { + background-color: var(--bg-surface); + border-radius: var(--radius-xl); + padding: 1.5rem; + border: 1px solid var(--border-light); +} + +.stat-label { + font-size: 0.75rem; + text-transform: uppercase; + color: var(--text-secondary); + font-weight: 600; + letter-spacing: 0.05em; +} + +.stat-value { + font-weight: 600; + font-size: 0.9375rem; + color: var(--text-primary); +} + +.status-badge { + display: inline-block; + font-size: 0.75rem; + padding: 4px 10px; + border-radius: 6px; + font-weight: 600; +} + +.status-badge--resolved { + background-color: #DEF7EC; + color: #03543F; +} + +.status-badge--escalated { + background-color: #FDE8E8; + color: #9B1C1C; +} + +.status-badge--active { + background-color: var(--bg-hover); + color: var(--text-secondary); +} + +.data-table { + width: 100%; + border-collapse: collapse; + text-align: left; +} + +.data-table th { + padding: 0.75rem 1.5rem; + font-size: 0.75rem; + text-transform: uppercase; + color: var(--text-secondary); + font-weight: 600; +} + +.data-table td { + padding: 1.25rem 1.5rem; + font-size: 0.9375rem; +} + +.data-table thead tr { + border-bottom: 2px solid var(--border-light); +} + +.data-table tbody tr { + border-bottom: 1px solid var(--border-light); + transition: background-color 0.2s; +} + +.data-table tbody tr:last-child { + border-bottom: none; +} + +.data-table tbody tr:hover { + background-color: var(--bg-hover); +} + +.empty-state { + padding: 3rem; + text-align: center; + color: var(--text-secondary); +} + +.empty-state__title { + font-size: 1.125rem; + font-weight: 600; + margin: 0; +} + +.empty-state__description { + margin-top: 0.5rem; +} + +.error-state { + padding: 3rem; + text-align: center; + color: var(--text-secondary); +} + +.error-state__title { + font-size: 1.125rem; + font-weight: 600; + color: var(--brand-accent); + margin: 0; +} + +.error-state__description { + margin-top: 0.5rem; +} + +.pagination-bar { + padding: 1.25rem 1.5rem; + border-top: 1px solid var(--border-light); + display: flex; + justify-content: space-between; + align-items: center; + background-color: var(--bg-surface-inner); +} + +.pagination-bar__info { + font-size: 0.875rem; + color: var(--text-secondary); +} + +.pagination-bar__controls { + display: flex; + gap: 0.5rem; +} + /* --- Skeleton Loading Animation --- */ @keyframes pulse-skeleton { 0% { opacity: 0.5; background-color: var(--bg-hover); } diff --git a/frontend/src/pages/DashboardPage.tsx b/frontend/src/pages/DashboardPage.tsx index 39d1f2d..f875cac 100644 --- a/frontend/src/pages/DashboardPage.tsx +++ b/frontend/src/pages/DashboardPage.tsx @@ -65,7 +65,7 @@ export function DashboardPage() { <>
{[1, 2, 3, 4].map(i => ( -
+
@@ -78,15 +78,15 @@ export function DashboardPage() {
) : error ? ( -
-

Failed to load analytics

-

{error}

+
+

Failed to load analytics

+

{error}

) : !data ? ( -
-

No analytics data available

-

Start some conversations to see metrics here.

+
+

No analytics data available

+

Start some conversations to see metrics here.

) : ( <> @@ -99,25 +99,25 @@ export function DashboardPage() {
{/* Agent Workload Table */} -
+

Agent Workload Distribution

{data.agent_usage.length === 0 ? (

No agent activity recorded yet.

) : ( - +
- - - - + + + + {data.agent_usage.map((a) => ( - e.currentTarget.style.backgroundColor = 'var(--bg-hover)'} onMouseLeave={(e) => e.currentTarget.style.backgroundColor = 'transparent'}> - - - + + + + ))} @@ -126,7 +126,7 @@ export function DashboardPage() { {/* Human in the loop card */} -
+

Security Approvals

? @@ -167,24 +167,10 @@ export function DashboardPage() { function MetricBox({ label, value, trend, positive }: { label: string, value: string | number, trend: string, positive?: boolean }) { return ( -
-
- {label} -
-
- {value} -
-
- {trend} -
+
+
{label}
+
{value}
+
{trend}
); } diff --git a/frontend/src/pages/ReplayListPage.tsx b/frontend/src/pages/ReplayListPage.tsx index ca90f96..8ea5a74 100644 --- a/frontend/src/pages/ReplayListPage.tsx +++ b/frontend/src/pages/ReplayListPage.tsx @@ -37,23 +37,27 @@ export function ReplayListPage() { return `$${usd.toFixed(2)}`; } + function statusClass(status: string | null): string { + if (status === "resolved") return "status-badge status-badge--resolved"; + if (status === "escalated") return "status-badge status-badge--escalated"; + return "status-badge status-badge--active"; + } + return (
-
-
-

Conversation Replay

-

Review autonomous agent sessions and audit MCP action execution trails.

-
+
+

Conversation Replay

+

Review autonomous agent sessions and audit MCP action execution trails.

{error ? ( -
-

Failed to load conversations

-

{error}

+
+

Failed to load conversations

+

{error}

) : isLoading ? ( -
+
{[1, 2, 3, 4, 5].map(i => (
@@ -61,56 +65,38 @@ export function ReplayListPage() { ))}
) : conversations.length === 0 ? ( -
-

No conversations yet

-

Start a chat session to see conversations here.

+
+

No conversations yet

+

Start a chat session to see conversations here.

) : ( -
-
Agent NameMessage CountShare
Agent NameMessage CountShare
{a.agent}{a.count.toLocaleString()}{pct(a.percentage)}
{a.agent}{a.count.toLocaleString()}{pct(a.percentage)}
+
+
- - - - - - + + + + + + - {conversations.map((c, i) => ( + {conversations.map((c) => ( navigate(`/replay/${c.thread_id}`)} - style={{ - borderBottom: i === conversations.length - 1 ? "none" : "1px solid var(--border-light)", - cursor: "pointer", - transition: "background-color 0.2s" - }} - className="replay-row-hover" + style={{ cursor: "pointer" }} > - - + + - - - @@ -118,11 +104,11 @@ export function ReplayListPage() {
ThreadCreatedLast ActivityStatusCost
ThreadCreatedLast ActivityStatusCost
-
{c.thread_id}
+
+ {c.thread_id} - {formatDate(c.created_at)} + {formatDate(c.created_at)}{formatDate(c.last_activity)} + {c.status ?? "active"} - {formatDate(c.last_activity)} - - - {c.status ?? "active"} - - + {c.total_tokens.toLocaleString()} tokens / {formatCost(c.total_cost_usd)}
-
- +
+ Showing {(page - 1) * perPage + 1}-{Math.min(page * perPage, total)} of {total} sessions -
+
)} -
); } diff --git a/frontend/src/pages/ReplayPage.tsx b/frontend/src/pages/ReplayPage.tsx index a81e572..b3615bd 100644 --- a/frontend/src/pages/ReplayPage.tsx +++ b/frontend/src/pages/ReplayPage.tsx @@ -28,23 +28,21 @@ export function ReplayPage() { return (
-
-
- -

Audit Trail: {threadId}

-

Detailed temporal log of agent reflections, MCP tool calls, and human overrides.

-
+
+ +

Audit Trail: {threadId}

+

Detailed temporal log of agent reflections, MCP tool calls, and human overrides.

{error ? ( -
-

Failed to load replay

-

{error}

+
+

Failed to load replay

+

{error}

) : isLoading ? (
@@ -52,29 +50,29 @@ export function ReplayPage() {
) : steps.length === 0 ? ( -
-

No replay steps found

-

This conversation has no recorded checkpoints.

+
+

No replay steps found

+

This conversation has no recorded checkpoints.

) : (
{/* Sidebar Summary Info */} -
+

Session Context

-
Thread ID
-
{threadId}
+
Thread ID
+
{threadId}
-
Total Steps
-
{totalSteps}
+
Total Steps
+
{totalSteps}
-
Time Range
+
Time Range
{steps[0]?.timestamp ? new Date(steps[0].timestamp).toLocaleString() : "N/A"} - {" - "} + {" \u2013 "} {steps[steps.length - 1]?.timestamp ? new Date(steps[steps.length - 1].timestamp).toLocaleString() : "N/A"}
@@ -82,7 +80,7 @@ export function ReplayPage() {
{/* Timeline */} -
+