Files
smart-support/backend/app/openapi/importer.py
Yaojia Wang f0699436c5 refactor: engineering improvements -- API versioning, structured logging, Alembic, error standardization, test coverage
- API versioning: all REST endpoints prefixed with /api/v1/
- Structured logging: replaced stdlib logging with structlog (console/JSON modes)
- Alembic migrations: versioned DB schema with initial migration
- Error standardization: global exception handlers for consistent envelope format
- Interrupt cleanup: asyncio background task for expired interrupt removal
- Integration tests: +30 tests (analytics, replay, openapi, error, session APIs)
- Frontend tests: +57 tests (all components, pages, useWebSocket hook)
- Backend: 557 tests, 89.75% coverage | Frontend: 80 tests, 16 test files
2026-04-06 23:19:29 +02:00

112 lines
3.5 KiB
Python

"""Import orchestrator for OpenAPI auto-discovery pipeline.
Orchestrates: fetch -> validate -> parse -> classify
Each stage updates the job status and calls the on_progress callback.
"""
from __future__ import annotations
from collections.abc import Callable
from dataclasses import replace
import structlog
from app.openapi.classifier import ClassifierProtocol, HeuristicClassifier
from app.openapi.fetcher import fetch_spec
from app.openapi.models import ImportJob
from app.openapi.parser import parse_endpoints
from app.openapi.ssrf import DEFAULT_POLICY, SSRFPolicy
from app.openapi.validator import validate_spec
logger = structlog.get_logger()
ProgressCallback = Callable[[str, ImportJob], None] | None
class ImportOrchestrator:
"""Orchestrates the full OpenAPI import pipeline.
Stages:
1. fetching -- download and parse spec from URL
2. validating -- check spec structure
3. parsing -- extract endpoint definitions
4. classifying -- classify endpoints for agent routing
5. done / failed
"""
def __init__(
self,
classifier: ClassifierProtocol | None = None,
policy: SSRFPolicy = DEFAULT_POLICY,
) -> None:
self._classifier = classifier or HeuristicClassifier()
self._policy = policy
async def start_import(
self,
url: str,
job_id: str,
on_progress: ProgressCallback,
) -> ImportJob:
"""Run the full import pipeline for a spec URL.
Returns an ImportJob reflecting final status (done or failed).
on_progress is called with (stage_name, current_job) at each stage.
Passing None for on_progress is safe.
"""
job = ImportJob(
job_id=job_id,
status="pending",
spec_url=url,
)
try:
# Stage 1: fetch
job = _update(job, status="fetching")
_notify(on_progress, "fetching", job)
spec_dict = await fetch_spec(url, self._policy)
# Stage 2: validate
job = _update(job, status="validating")
_notify(on_progress, "validating", job)
errors = validate_spec(spec_dict)
if errors:
raise ValueError(f"Invalid spec: {'; '.join(errors)}")
# Stage 3: parse
job = _update(job, status="parsing")
_notify(on_progress, "parsing", job)
endpoints = parse_endpoints(spec_dict)
# Stage 4: classify
job = _update(job, status="classifying", total_endpoints=len(endpoints))
_notify(on_progress, "classifying", job)
classifications = await self._classifier.classify(endpoints)
# Done
job = _update(
job,
status="done",
total_endpoints=len(endpoints),
classified_count=len(classifications),
)
_notify(on_progress, "done", job)
return job
except Exception as exc:
logger.exception("Import pipeline failed for job %s", job_id)
job = _update(job, status="failed", error_message=str(exc))
_notify(on_progress, "failed", job)
return job
def _update(job: ImportJob, **kwargs: object) -> ImportJob:
"""Return a new ImportJob with updated fields (immutable update)."""
return replace(job, **kwargs)
def _notify(callback: ProgressCallback, stage: str, job: ImportJob) -> None:
"""Call the progress callback if provided."""
if callback is not None:
callback(stage, job)