Backend: - ConversationTracker: Protocol + PostgresConversationTracker for lifecycle tracking - Error handler: ErrorCategory enum, classify_error(), with_retry() exponential backoff - Wire PostgresAnalyticsRecorder + ConversationTracker into ws_handler - Rate limiting (10 msg/10s per thread), edge case hardening - Health endpoint GET /api/health, version 0.5.0 - Demo seed data script + sample OpenAPI spec Frontend (all new): - React Router with NavBar (Chat / Replay / Dashboard / Review) - ReplayListPage + ReplayPage with ReplayTimeline component - DashboardPage with MetricCard, range selector, zero-state - ReviewPage for OpenAPI classification review - ErrorBanner for WebSocket disconnect handling - API client (api.ts) with typed fetch wrappers Infrastructure: - Frontend Dockerfile (multi-stage node -> nginx) - nginx.conf with SPA routing + API/WS proxy - docker-compose.yml with frontend service + healthchecks - .env.example files (root + backend) Documentation: - README.md with quick start and architecture - Agent configuration guide - OpenAPI import guide - Deployment guide - Demo script 48 new tests, 449 total passing, 92.87% coverage
73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
"""Error classification and retry logic for tool calls."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from enum import Enum
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Callable
|
|
|
|
import httpx
|
|
|
|
|
|
class ErrorCategory(Enum):
|
|
"""Categories for error classification to guide retry decisions."""
|
|
|
|
RETRYABLE = "retryable"
|
|
NON_RETRYABLE = "non_retryable"
|
|
AUTH_FAILURE = "auth_failure"
|
|
TIMEOUT = "timeout"
|
|
NETWORK = "network"
|
|
|
|
|
|
def classify_error(exc: Exception) -> ErrorCategory:
|
|
"""Classify an exception into an ErrorCategory.
|
|
|
|
Rules:
|
|
- httpx.TimeoutException -> TIMEOUT
|
|
- httpx.ConnectError -> NETWORK
|
|
- httpx.HTTPStatusError 401/403 -> AUTH_FAILURE
|
|
- httpx.HTTPStatusError 429/500/502/503 -> RETRYABLE
|
|
- anything else -> NON_RETRYABLE
|
|
"""
|
|
if isinstance(exc, httpx.TimeoutException):
|
|
return ErrorCategory.TIMEOUT
|
|
if isinstance(exc, httpx.ConnectError):
|
|
return ErrorCategory.NETWORK
|
|
if isinstance(exc, httpx.HTTPStatusError):
|
|
code = exc.response.status_code
|
|
if code in (401, 403):
|
|
return ErrorCategory.AUTH_FAILURE
|
|
if code in (429, 500, 502, 503):
|
|
return ErrorCategory.RETRYABLE
|
|
return ErrorCategory.NON_RETRYABLE
|
|
return ErrorCategory.NON_RETRYABLE
|
|
|
|
|
|
async def with_retry(
|
|
fn: Callable[..., Any],
|
|
max_retries: int = 3,
|
|
base_delay: float = 1.0,
|
|
) -> Any:
|
|
"""Execute an async callable with exponential backoff for RETRYABLE errors.
|
|
|
|
Only ErrorCategory.RETRYABLE errors trigger retries. All other error
|
|
categories raise immediately after the first attempt.
|
|
"""
|
|
last_exc: Exception | None = None
|
|
for attempt in range(1, max_retries + 1):
|
|
try:
|
|
return await fn()
|
|
except Exception as exc:
|
|
category = classify_error(exc)
|
|
if category != ErrorCategory.RETRYABLE:
|
|
raise
|
|
last_exc = exc
|
|
if attempt < max_retries:
|
|
delay = base_delay * (2 ** (attempt - 1))
|
|
await asyncio.sleep(delay)
|
|
|
|
raise last_exc # type: ignore[misc]
|