"""FastAPI router for OpenAPI import review workflow. Exposes endpoints for: - Starting an import job (triggers background pipeline) - Querying job status - Reviewing and editing classifications - Approving a job to trigger tool generation """ from __future__ import annotations import asyncio import logging import re import uuid from typing import Literal from fastapi import APIRouter, BackgroundTasks, HTTPException from pydantic import BaseModel, field_validator from app.openapi.importer import ImportOrchestrator from app.openapi.models import ClassificationResult, ImportJob logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/openapi", tags=["openapi"]) # In-memory store: job_id -> job dict, guarded by async lock _job_store: dict[str, dict] = {} _store_lock = asyncio.Lock() # Shared orchestrator instance _orchestrator = ImportOrchestrator() # --- Request / Response schemas --- class ImportRequest(BaseModel): url: str @field_validator("url") @classmethod def url_must_be_valid(cls, value: str) -> str: stripped = value.strip() if not stripped: raise ValueError("url must not be empty") if not stripped.startswith(("http://", "https://")): raise ValueError("url must start with http:// or https://") return stripped class JobResponse(BaseModel): job_id: str status: str spec_url: str total_endpoints: int = 0 classified_count: int = 0 error_message: str | None = None class ClassificationResponse(BaseModel): index: int access_type: str needs_interrupt: bool agent_group: str confidence: float customer_params: list[str] endpoint: dict class UpdateClassificationRequest(BaseModel): access_type: Literal["read", "write"] needs_interrupt: bool agent_group: str @field_validator("agent_group") @classmethod def agent_group_must_be_safe(cls, value: str) -> str: if not value.strip() or not re.fullmatch(r"[a-zA-Z0-9_\-]+", value): raise ValueError( "agent_group must be non-empty and contain only " "alphanumeric characters, underscores, or hyphens" ) return value # --- Helpers --- def _job_to_response(job: dict) -> dict: return { "job_id": job["job_id"], "status": job["status"], "spec_url": job["spec_url"], "total_endpoints": job.get("total_endpoints", 0), "classified_count": job.get("classified_count", 0), "error_message": job.get("error_message"), } def _classification_to_response(idx: int, clf: ClassificationResult) -> dict: ep = clf.endpoint return { "index": idx, "access_type": clf.access_type, "needs_interrupt": clf.needs_interrupt, "agent_group": clf.agent_group, "confidence": clf.confidence, "customer_params": list(clf.customer_params), "endpoint": { "path": ep.path, "method": ep.method, "operation_id": ep.operation_id, "summary": ep.summary, "description": ep.description, }, } async def _run_import(job_id: str, url: str) -> None: """Run the import pipeline as a background task.""" def on_progress(stage: str, result_job: ImportJob) -> None: if job_id in _job_store: _job_store[job_id] = { **_job_store[job_id], "status": result_job.status, "total_endpoints": result_job.total_endpoints, "classified_count": result_job.classified_count, "error_message": result_job.error_message, } try: result = await _orchestrator.start_import( url=url, job_id=job_id, on_progress=on_progress, ) if job_id in _job_store: _job_store[job_id] = { **_job_store[job_id], "status": result.status, "total_endpoints": result.total_endpoints, "classified_count": result.classified_count, "error_message": result.error_message, } except Exception: logger.exception("Background import failed for job %s", job_id) if job_id in _job_store: _job_store[job_id] = { **_job_store[job_id], "status": "failed", "error_message": "Import failed. Please check the URL and try again.", } # --- Endpoints --- @router.post("/import", status_code=202) async def start_import( request: ImportRequest, background_tasks: BackgroundTasks, ) -> dict: """Start an OpenAPI import job for the given spec URL.""" job_id = str(uuid.uuid4()) job: dict = { "job_id": job_id, "status": "pending", "spec_url": request.url, "total_endpoints": 0, "classified_count": 0, "error_message": None, "classifications": [], } _job_store[job_id] = job background_tasks.add_task(_run_import, job_id, request.url) return _job_to_response(job) @router.get("/jobs/{job_id}") async def get_job(job_id: str) -> dict: """Get the status of an import job.""" job = _job_store.get(job_id) if job is None: raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") return _job_to_response(job) @router.get("/jobs/{job_id}/classifications") async def get_classifications(job_id: str) -> list: """Get all classifications for an import job.""" job = _job_store.get(job_id) if job is None: raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") classifications: list[ClassificationResult] = job.get("classifications", []) return [ _classification_to_response(i, clf) for i, clf in enumerate(classifications) ] @router.put("/jobs/{job_id}/classifications/{idx}") async def update_classification( job_id: str, idx: int, request: UpdateClassificationRequest, ) -> dict: """Update a specific classification by index.""" job = _job_store.get(job_id) if job is None: raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") classifications: list[ClassificationResult] = job.get("classifications", []) if idx < 0 or idx >= len(classifications): raise HTTPException( status_code=404, detail=f"Classification index {idx} out of range", ) original = classifications[idx] updated = ClassificationResult( endpoint=original.endpoint, access_type=request.access_type, customer_params=original.customer_params, agent_group=request.agent_group, confidence=original.confidence, needs_interrupt=request.needs_interrupt, ) new_classifications = list(classifications) new_classifications[idx] = updated _job_store[job_id] = {**job, "classifications": new_classifications} return _classification_to_response(idx, updated) @router.post("/jobs/{job_id}/approve") async def approve_job(job_id: str) -> dict: """Approve a job's classifications and trigger tool generation.""" job = _job_store.get(job_id) if job is None: raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") updated_job = {**job, "status": "approved"} _job_store[job_id] = updated_job return _job_to_response(updated_job)