smart-support/backend/app/openapi/importer.py

"""Import orchestrator for OpenAPI auto-discovery pipeline.

Orchestrates: fetch -> validate -> parse -> classify
Each stage updates the job status and calls the on_progress callback.
"""

from __future__ import annotations

from collections.abc import Callable
from dataclasses import replace

import structlog

from app.openapi.classifier import ClassifierProtocol, HeuristicClassifier
from app.openapi.fetcher import fetch_spec
from app.openapi.models import ImportJob
from app.openapi.parser import parse_endpoints
from app.openapi.ssrf import DEFAULT_POLICY, SSRFPolicy
from app.openapi.validator import validate_spec

logger = structlog.get_logger()

ProgressCallback = Callable[[str, ImportJob], None] | None


class ImportOrchestrator:
    """Orchestrates the full OpenAPI import pipeline.

    Stages:
    1. fetching  -- download and parse spec from URL
    2. validating -- check spec structure
    3. parsing   -- extract endpoint definitions
    4. classifying -- classify endpoints for agent routing
    5. done / failed
    """

    def __init__(
        self,
        classifier: ClassifierProtocol | None = None,
        policy: SSRFPolicy = DEFAULT_POLICY,
    ) -> None:
        self._classifier = classifier or HeuristicClassifier()
        self._policy = policy

    async def start_import(
        self,
        url: str,
        job_id: str,
        on_progress: ProgressCallback,
    ) -> ImportJob:
        """Run the full import pipeline for a spec URL.

        Returns an ImportJob reflecting final status (done or failed).
        on_progress is called with (stage_name, current_job) at each stage.
        Passing None for on_progress is safe.
        """
        job = ImportJob(
            job_id=job_id,
            status="pending",
            spec_url=url,
        )

        try:
            # Stage 1: fetch
            job = _update(job, status="fetching")
            _notify(on_progress, "fetching", job)
            spec_dict = await fetch_spec(url, self._policy)

            # Stage 2: validate
            job = _update(job, status="validating")
            _notify(on_progress, "validating", job)
            errors = validate_spec(spec_dict)
            if errors:
                raise ValueError(f"Invalid spec: {'; '.join(errors)}")

            # Stage 3: parse
            job = _update(job, status="parsing")
            _notify(on_progress, "parsing", job)
            endpoints = parse_endpoints(spec_dict)

            # Stage 4: classify
            job = _update(job, status="classifying", total_endpoints=len(endpoints))
            _notify(on_progress, "classifying", job)
            classifications = await self._classifier.classify(endpoints)

            # Done
            job = _update(
                job,
                status="done",
                total_endpoints=len(endpoints),
                classified_count=len(classifications),
            )
            _notify(on_progress, "done", job)
            return job

        except Exception as exc:
            logger.exception("Import pipeline failed for job %s", job_id)
            job = _update(job, status="failed", error_message=str(exc))
            _notify(on_progress, "failed", job)
            return job


def _update(job: ImportJob, **kwargs: object) -> ImportJob:
    """Return a new ImportJob with updated fields (immutable update)."""
    return replace(job, **kwargs)


def _notify(callback: ProgressCallback, stage: str, job: ImportJob) -> None:
    """Call the progress callback if provided."""
    if callback is not None:
        callback(stage, job)