feat: complete phase 3 -- OpenAPI auto-discovery, SSRF protection, tool generation
- SSRF protection: private IP blocking, DNS rebinding defense, redirect validation - OpenAPI fetcher with SSRF guard, JSON/YAML auto-detection, 10MB limit - Structural spec validator (3.0.x/3.1.x) - Endpoint parser with $ref resolution, auto-generated operation IDs - Heuristic + LLM endpoint classifier with Protocol interface - Review API at /api/openapi (import, job status, classification CRUD, approve) - @tool code generator + Agent YAML generator - Import orchestrator (fetch -> validate -> parse -> classify pipeline) - 125 new tests, 322 total passing, 93.23% coverage
This commit is contained in:
93
backend/app/openapi/fetcher.py
Normal file
93
backend/app/openapi/fetcher.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""OpenAPI spec fetcher with SSRF protection.
|
||||
|
||||
Fetches OpenAPI spec documents from remote URLs, validates them against
|
||||
SSRF policy, and parses JSON or YAML format automatically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import yaml
|
||||
|
||||
from app.openapi.ssrf import DEFAULT_POLICY, SSRFPolicy
|
||||
|
||||
_MAX_SIZE_BYTES = 10 * 1024 * 1024 # 10MB
|
||||
|
||||
|
||||
async def fetch_spec(url: str, policy: SSRFPolicy = DEFAULT_POLICY) -> dict:
|
||||
"""Fetch an OpenAPI spec from a URL and return as a dict.
|
||||
|
||||
Auto-detects JSON or YAML format from content-type header or URL extension.
|
||||
Enforces a 10MB size limit.
|
||||
|
||||
Raises:
|
||||
SSRFError: If the URL is blocked by SSRF policy.
|
||||
ValueError: If the response is too large or cannot be parsed.
|
||||
"""
|
||||
from app.openapi.ssrf import safe_fetch
|
||||
|
||||
response = await safe_fetch(url, policy=policy)
|
||||
response.raise_for_status()
|
||||
|
||||
content = response.text
|
||||
if len(content.encode("utf-8")) > _MAX_SIZE_BYTES:
|
||||
raise ValueError(
|
||||
f"Response too large: {len(content.encode('utf-8'))} bytes "
|
||||
f"(max {_MAX_SIZE_BYTES} bytes)"
|
||||
)
|
||||
|
||||
content_type = response.headers.get("content-type", "")
|
||||
return _parse_content(content, content_type, url)
|
||||
|
||||
|
||||
def _parse_content(content: str, content_type: str, url: str) -> dict:
|
||||
"""Parse content as JSON or YAML based on content-type or URL extension."""
|
||||
if _is_yaml_format(content_type, url):
|
||||
return _parse_yaml(content)
|
||||
if _is_json_format(content_type, url):
|
||||
return _parse_json(content)
|
||||
# Fall back: try JSON first, then YAML
|
||||
try:
|
||||
return _parse_json(content)
|
||||
except ValueError:
|
||||
return _parse_yaml(content)
|
||||
|
||||
|
||||
def _is_yaml_format(content_type: str, url: str) -> bool:
|
||||
"""Check if the content is YAML format."""
|
||||
yaml_types = {"application/x-yaml", "text/yaml", "application/yaml"}
|
||||
if any(t in content_type for t in yaml_types):
|
||||
return True
|
||||
lower_url = url.lower().split("?")[0]
|
||||
return lower_url.endswith(".yaml") or lower_url.endswith(".yml")
|
||||
|
||||
|
||||
def _is_json_format(content_type: str, url: str) -> bool:
|
||||
"""Check if the content is JSON format."""
|
||||
if "application/json" in content_type:
|
||||
return True
|
||||
lower_url = url.lower().split("?")[0]
|
||||
return lower_url.endswith(".json")
|
||||
|
||||
|
||||
def _parse_json(content: str) -> dict:
|
||||
"""Parse content as JSON, raising ValueError on failure."""
|
||||
try:
|
||||
result = json.loads(content)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"Invalid JSON: {exc}") from exc
|
||||
if not isinstance(result, dict):
|
||||
raise ValueError(f"Expected a JSON object, got {type(result).__name__}")
|
||||
return result
|
||||
|
||||
|
||||
def _parse_yaml(content: str) -> dict:
|
||||
"""Parse content as YAML, raising ValueError on failure."""
|
||||
try:
|
||||
result = yaml.safe_load(content)
|
||||
except yaml.YAMLError as exc:
|
||||
raise ValueError(f"Invalid YAML: {exc}") from exc
|
||||
if not isinstance(result, dict):
|
||||
raise ValueError(f"Expected a YAML mapping, got {type(result).__name__}")
|
||||
return result
|
||||
Reference in New Issue
Block a user