refactor: engineering improvements -- API versioning, structured logging, Alembic, error standardization, test coverage
- API versioning: all REST endpoints prefixed with /api/v1/ - Structured logging: replaced stdlib logging with structlog (console/JSON modes) - Alembic migrations: versioned DB schema with initial migration - Error standardization: global exception handlers for consistent envelope format - Interrupt cleanup: asyncio background task for expired interrupt removal - Integration tests: +30 tests (analytics, replay, openapi, error, session APIs) - Frontend tests: +57 tests (all components, pages, useWebSocket hook) - Backend: 557 tests, 89.75% coverage | Frontend: 80 tests, 16 test files
This commit is contained in:
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
||||
from psycopg_pool import AsyncConnectionPool
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/analytics",
|
||||
prefix="/api/v1/analytics",
|
||||
tags=["analytics"],
|
||||
dependencies=[Depends(require_admin_api_key)],
|
||||
)
|
||||
|
||||
@@ -2,14 +2,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import secrets
|
||||
from typing import Annotated
|
||||
|
||||
import structlog
|
||||
from fastapi import Depends, HTTPException, Query, Request, WebSocket, status
|
||||
from fastapi.security import APIKeyHeader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
_API_KEY_HEADER = APIKeyHeader(name="X-API-Key", auto_error=False)
|
||||
|
||||
|
||||
@@ -32,6 +32,8 @@ class Settings(BaseSettings):
|
||||
|
||||
template_name: str = ""
|
||||
|
||||
log_format: str = "console" # "console" for dev, "json" for production
|
||||
|
||||
admin_api_key: str = ""
|
||||
|
||||
anthropic_api_key: str = ""
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
||||
@@ -88,6 +89,17 @@ async def create_checkpointer(pool: AsyncConnectionPool) -> AsyncPostgresSaver:
|
||||
return checkpointer
|
||||
|
||||
|
||||
def run_alembic_migrations(database_url: str) -> None:
|
||||
"""Run Alembic migrations to head."""
|
||||
from alembic.config import Config
|
||||
|
||||
from alembic import command
|
||||
|
||||
alembic_cfg = Config(str(Path(__file__).parent.parent / "alembic.ini"))
|
||||
alembic_cfg.set_main_option("sqlalchemy.url", database_url)
|
||||
command.upgrade(alembic_cfg, "head")
|
||||
|
||||
|
||||
async def setup_app_tables(pool: AsyncConnectionPool) -> None:
|
||||
"""Create application-specific tables and apply migrations."""
|
||||
async with pool.connection() as conn:
|
||||
|
||||
@@ -3,14 +3,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Protocol
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class EscalationPayload(BaseModel, frozen=True):
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from langchain.agents import create_agent
|
||||
@@ -18,7 +17,9 @@ if TYPE_CHECKING:
|
||||
from app.intent import IntentClassifier
|
||||
from app.registry import AgentRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
SUPERVISOR_PROMPT = (
|
||||
"You are a customer support supervisor. "
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Protocol
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -12,7 +11,9 @@ if TYPE_CHECKING:
|
||||
|
||||
from app.registry import AgentConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
CLASSIFICATION_PROMPT = (
|
||||
"You are an intent classifier for a customer support system.\n"
|
||||
|
||||
57
backend/app/logging_config.py
Normal file
57
backend/app/logging_config.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""Structured logging configuration using structlog."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import structlog
|
||||
|
||||
|
||||
def configure_logging(log_format: str = "console") -> None:
|
||||
"""Configure structlog with stdlib integration.
|
||||
|
||||
Args:
|
||||
log_format: "console" for human-readable dev output,
|
||||
"json" for machine-parseable production output.
|
||||
"""
|
||||
shared_processors: list[structlog.types.Processor] = [
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
]
|
||||
|
||||
if log_format == "json":
|
||||
renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
|
||||
else:
|
||||
renderer = structlog.dev.ConsoleRenderer()
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
*shared_processors,
|
||||
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
|
||||
],
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
formatter = structlog.stdlib.ProcessorFormatter(
|
||||
processors=[
|
||||
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
||||
renderer,
|
||||
],
|
||||
)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear()
|
||||
root_logger.addHandler(handler)
|
||||
root_logger.setLevel(logging.INFO)
|
||||
@@ -2,25 +2,30 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
import contextlib
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastapi import Depends, FastAPI, Query, WebSocket, WebSocketDisconnect
|
||||
from fastapi import FastAPI, HTTPException, Query, WebSocket, WebSocketDisconnect
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from app.analytics.api import router as analytics_router
|
||||
from app.analytics.event_recorder import PostgresAnalyticsRecorder
|
||||
from app.api_utils import envelope
|
||||
from app.callbacks import TokenUsageCallbackHandler
|
||||
from app.config import Settings
|
||||
from app.conversation_tracker import PostgresConversationTracker
|
||||
from app.db import create_checkpointer, create_pool, setup_app_tables
|
||||
from app.db import create_checkpointer, create_pool, run_alembic_migrations
|
||||
from app.escalation import NoOpEscalator, WebhookEscalator
|
||||
from app.graph import build_graph
|
||||
from app.intent import LLMIntentClassifier
|
||||
from app.interrupt_manager import InterruptManager
|
||||
from app.llm import create_llm
|
||||
from app.logging_config import configure_logging
|
||||
from app.openapi.review_api import router as openapi_router
|
||||
from app.registry import AgentRegistry
|
||||
from app.replay.api import router as replay_router
|
||||
@@ -31,19 +36,44 @@ from app.ws_handler import dispatch_message
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
AGENTS_YAML = Path(__file__).parent.parent / "agents.yaml"
|
||||
FRONTEND_DIST = Path(__file__).parent.parent.parent / "frontend" / "dist"
|
||||
|
||||
|
||||
async def _interrupt_cleanup_loop(
|
||||
interrupt_manager: InterruptManager,
|
||||
interval: int = 60,
|
||||
) -> None:
|
||||
"""Periodically remove expired interrupts in the background.
|
||||
|
||||
Runs until cancelled. Catches all exceptions to prevent the task
|
||||
from dying unexpectedly.
|
||||
"""
|
||||
while True:
|
||||
await asyncio.sleep(interval)
|
||||
try:
|
||||
expired = interrupt_manager.cleanup_expired()
|
||||
if expired:
|
||||
logger.info(
|
||||
"Cleaned up %d expired interrupt(s)",
|
||||
len(expired),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Error during interrupt cleanup")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
settings = Settings()
|
||||
configure_logging(settings.log_format)
|
||||
|
||||
pool = await create_pool(settings)
|
||||
checkpointer = await create_checkpointer(pool)
|
||||
await setup_app_tables(pool)
|
||||
run_alembic_migrations(settings.database_url)
|
||||
|
||||
# Load agents from template or default YAML
|
||||
if settings.template_name:
|
||||
@@ -89,8 +119,16 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
settings.template_name or "(default)",
|
||||
)
|
||||
|
||||
cleanup_task = asyncio.create_task(
|
||||
_interrupt_cleanup_loop(interrupt_manager),
|
||||
)
|
||||
|
||||
yield
|
||||
|
||||
cleanup_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await cleanup_task
|
||||
|
||||
await pool.close()
|
||||
|
||||
|
||||
@@ -103,7 +141,35 @@ app.include_router(replay_router)
|
||||
app.include_router(analytics_router)
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
@app.exception_handler(HTTPException)
|
||||
async def http_exception_handler(request, exc): # type: ignore[no-untyped-def]
|
||||
"""Wrap HTTPException in standard envelope format."""
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=envelope(None, success=False, error=exc.detail),
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def validation_exception_handler(request, exc): # type: ignore[no-untyped-def]
|
||||
"""Wrap validation errors in standard envelope format."""
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content=envelope(None, success=False, error=str(exc)),
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def general_exception_handler(request, exc): # type: ignore[no-untyped-def]
|
||||
"""Catch-all handler -- never leak stack traces."""
|
||||
logger.exception("Unhandled exception: %s", exc)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content=envelope(None, success=False, error="Internal server error"),
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/v1/health")
|
||||
def health_check() -> dict:
|
||||
"""Health check endpoint for load balancers and monitoring."""
|
||||
return {"status": "ok", "version": _VERSION}
|
||||
|
||||
@@ -8,13 +8,14 @@ classifier and an LLM-backed classifier with heuristic fallback.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Protocol
|
||||
|
||||
import structlog
|
||||
|
||||
from app.openapi.models import ClassificationResult, EndpointInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
_WRITE_METHODS = frozenset({"POST", "PUT", "PATCH", "DELETE"})
|
||||
_INTERRUPT_METHODS = frozenset({"POST", "PUT", "PATCH", "DELETE"})
|
||||
|
||||
@@ -6,10 +6,11 @@ Each stage updates the job status and calls the on_progress callback.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from dataclasses import replace
|
||||
|
||||
import structlog
|
||||
|
||||
from app.openapi.classifier import ClassifierProtocol, HeuristicClassifier
|
||||
from app.openapi.fetcher import fetch_spec
|
||||
from app.openapi.models import ImportJob
|
||||
@@ -17,7 +18,7 @@ from app.openapi.parser import parse_endpoints
|
||||
from app.openapi.ssrf import DEFAULT_POLICY, SSRFPolicy
|
||||
from app.openapi.validator import validate_spec
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
ProgressCallback = Callable[[str, ImportJob], None] | None
|
||||
|
||||
|
||||
@@ -10,11 +10,11 @@ Exposes endpoints for:
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from typing import Literal
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
@@ -23,10 +23,10 @@ from app.openapi.generator import generate_agent_yaml, generate_tool_code
|
||||
from app.openapi.importer import ImportOrchestrator
|
||||
from app.openapi.models import ClassificationResult, ImportJob
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/openapi",
|
||||
prefix="/api/v1/openapi",
|
||||
tags=["openapi"],
|
||||
dependencies=[Depends(require_admin_api_key)],
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
||||
from psycopg_pool import AsyncConnectionPool
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api",
|
||||
prefix="/api/v1",
|
||||
tags=["replay"],
|
||||
dependencies=[Depends(require_admin_api_key)],
|
||||
)
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import structlog
|
||||
|
||||
from app.replay.models import ReplayStep, StepType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
_EMPTY_TIMESTAMP = "1970-01-01T00:00:00Z"
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from collections import defaultdict
|
||||
@@ -21,7 +20,9 @@ if TYPE_CHECKING:
|
||||
from app.session_manager import SessionManager
|
||||
from app.ws_context import WebSocketContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
MAX_MESSAGE_SIZE = 32_768 # 32 KB
|
||||
MAX_CONTENT_LENGTH = 10_000 # characters
|
||||
|
||||
Reference in New Issue
Block a user