Files
smart-support/backend/app/openapi/review_api.py
Yaojia Wang f0699436c5 refactor: engineering improvements -- API versioning, structured logging, Alembic, error standardization, test coverage
- API versioning: all REST endpoints prefixed with /api/v1/
- Structured logging: replaced stdlib logging with structlog (console/JSON modes)
- Alembic migrations: versioned DB schema with initial migration
- Error standardization: global exception handlers for consistent envelope format
- Interrupt cleanup: asyncio background task for expired interrupt removal
- Integration tests: +30 tests (analytics, replay, openapi, error, session APIs)
- Frontend tests: +57 tests (all components, pages, useWebSocket hook)
- Backend: 557 tests, 89.75% coverage | Frontend: 80 tests, 16 test files
2026-04-06 23:19:29 +02:00

283 lines
8.4 KiB
Python

"""FastAPI router for OpenAPI import review workflow.
Exposes endpoints for:
- Starting an import job (triggers background pipeline)
- Querying job status
- Reviewing and editing classifications
- Approving a job to trigger tool generation
"""
from __future__ import annotations
import asyncio
import re
import uuid
from typing import Literal
import structlog
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel, field_validator
from app.auth import require_admin_api_key
from app.openapi.generator import generate_agent_yaml, generate_tool_code
from app.openapi.importer import ImportOrchestrator
from app.openapi.models import ClassificationResult, ImportJob
logger = structlog.get_logger()
router = APIRouter(
prefix="/api/v1/openapi",
tags=["openapi"],
dependencies=[Depends(require_admin_api_key)],
)
# In-memory store: job_id -> job dict, guarded by async lock
_job_store: dict[str, dict] = {}
_store_lock = asyncio.Lock()
# Shared orchestrator instance
_orchestrator = ImportOrchestrator()
# --- Request / Response schemas ---
class ImportRequest(BaseModel):
url: str
@field_validator("url")
@classmethod
def url_must_be_valid(cls, value: str) -> str:
stripped = value.strip()
if not stripped:
raise ValueError("url must not be empty")
if not stripped.startswith(("http://", "https://")):
raise ValueError("url must start with http:// or https://")
return stripped
class JobResponse(BaseModel):
job_id: str
status: str
spec_url: str
total_endpoints: int = 0
classified_count: int = 0
error_message: str | None = None
class ClassificationResponse(BaseModel):
index: int
access_type: str
needs_interrupt: bool
agent_group: str
confidence: float
customer_params: list[str]
endpoint: dict
class UpdateClassificationRequest(BaseModel):
access_type: Literal["read", "write"]
needs_interrupt: bool
agent_group: str
@field_validator("agent_group")
@classmethod
def agent_group_must_be_safe(cls, value: str) -> str:
if not value.strip() or not re.fullmatch(r"[a-zA-Z0-9_\-]+", value):
raise ValueError(
"agent_group must be non-empty and contain only "
"alphanumeric characters, underscores, or hyphens"
)
return value
# --- Helpers ---
def _job_to_response(job: dict) -> dict:
return {
"job_id": job["job_id"],
"status": job["status"],
"spec_url": job["spec_url"],
"total_endpoints": job.get("total_endpoints", 0),
"classified_count": job.get("classified_count", 0),
"error_message": job.get("error_message"),
}
def _classification_to_response(idx: int, clf: ClassificationResult) -> dict:
ep = clf.endpoint
return {
"index": idx,
"access_type": clf.access_type,
"needs_interrupt": clf.needs_interrupt,
"agent_group": clf.agent_group,
"confidence": clf.confidence,
"customer_params": list(clf.customer_params),
"endpoint": {
"path": ep.path,
"method": ep.method,
"operation_id": ep.operation_id,
"summary": ep.summary,
"description": ep.description,
},
}
async def _run_import(job_id: str, url: str) -> None:
"""Run the import pipeline as a background task."""
def on_progress(stage: str, result_job: ImportJob) -> None:
if job_id in _job_store:
_job_store[job_id] = {
**_job_store[job_id],
"status": result_job.status,
"total_endpoints": result_job.total_endpoints,
"classified_count": result_job.classified_count,
"error_message": result_job.error_message,
}
try:
result = await _orchestrator.start_import(
url=url, job_id=job_id, on_progress=on_progress,
)
if job_id in _job_store:
_job_store[job_id] = {
**_job_store[job_id],
"status": result.status,
"total_endpoints": result.total_endpoints,
"classified_count": result.classified_count,
"error_message": result.error_message,
}
except Exception:
logger.exception("Background import failed for job %s", job_id)
if job_id in _job_store:
_job_store[job_id] = {
**_job_store[job_id],
"status": "failed",
"error_message": "Import failed. Please check the URL and try again.",
}
# --- Endpoints ---
@router.post("/import", status_code=202)
async def start_import(
request: ImportRequest,
background_tasks: BackgroundTasks,
) -> dict:
"""Start an OpenAPI import job for the given spec URL."""
job_id = str(uuid.uuid4())
job: dict = {
"job_id": job_id,
"status": "pending",
"spec_url": request.url,
"total_endpoints": 0,
"classified_count": 0,
"error_message": None,
"classifications": [],
}
_job_store[job_id] = job
background_tasks.add_task(_run_import, job_id, request.url)
return _job_to_response(job)
@router.get("/jobs/{job_id}")
async def get_job(job_id: str) -> dict:
"""Get the status of an import job."""
job = _job_store.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
return _job_to_response(job)
@router.get("/jobs/{job_id}/classifications")
async def get_classifications(job_id: str) -> list:
"""Get all classifications for an import job."""
job = _job_store.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
classifications: list[ClassificationResult] = job.get("classifications", [])
return [
_classification_to_response(i, clf)
for i, clf in enumerate(classifications)
]
@router.put("/jobs/{job_id}/classifications/{idx}")
async def update_classification(
job_id: str,
idx: int,
request: UpdateClassificationRequest,
) -> dict:
"""Update a specific classification by index."""
job = _job_store.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
classifications: list[ClassificationResult] = job.get("classifications", [])
if idx < 0 or idx >= len(classifications):
raise HTTPException(
status_code=404,
detail=f"Classification index {idx} out of range",
)
original = classifications[idx]
updated = ClassificationResult(
endpoint=original.endpoint,
access_type=request.access_type,
customer_params=original.customer_params,
agent_group=request.agent_group,
confidence=original.confidence,
needs_interrupt=request.needs_interrupt,
)
new_classifications = list(classifications)
new_classifications[idx] = updated
_job_store[job_id] = {**job, "classifications": new_classifications}
return _classification_to_response(idx, updated)
@router.post("/jobs/{job_id}/approve")
async def approve_job(job_id: str) -> dict:
"""Approve a job's classifications and trigger tool generation.
Generates Python tool code for each classified endpoint and
produces an agent YAML configuration snippet.
"""
job = _job_store.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
classifications: list[ClassificationResult] = job.get("classifications", [])
if not classifications:
raise HTTPException(
status_code=400,
detail="No classifications to approve. Import must complete first.",
)
base_url = job["spec_url"].rsplit("/", 1)[0]
generated_tools = []
for clf in classifications:
tool = generate_tool_code(clf, base_url)
generated_tools.append({
"function_name": tool.function_name,
"agent_group": clf.agent_group,
"code": tool.code,
})
agent_yaml = generate_agent_yaml(tuple(classifications), base_url)
updated_job = {
**job,
"status": "approved",
"generated_tools": generated_tools,
"agent_yaml": agent_yaml,
}
_job_store[job_id] = updated_job
response = _job_to_response(updated_job)
response["generated_tools_count"] = len(generated_tools)
return response