"""OpenAPI spec fetcher with SSRF protection. Fetches OpenAPI spec documents from remote URLs, validates them against SSRF policy, and parses JSON or YAML format automatically. """ from __future__ import annotations import json import yaml from app.openapi.ssrf import DEFAULT_POLICY, SSRFPolicy _MAX_SIZE_BYTES = 10 * 1024 * 1024 # 10MB async def fetch_spec(url: str, policy: SSRFPolicy = DEFAULT_POLICY) -> dict: """Fetch an OpenAPI spec from a URL and return as a dict. Auto-detects JSON or YAML format from content-type header or URL extension. Enforces a 10MB size limit. Raises: SSRFError: If the URL is blocked by SSRF policy. ValueError: If the response is too large or cannot be parsed. """ from app.openapi.ssrf import safe_fetch response = await safe_fetch(url, policy=policy) response.raise_for_status() content = response.text if len(content.encode("utf-8")) > _MAX_SIZE_BYTES: raise ValueError( f"Response too large: {len(content.encode('utf-8'))} bytes " f"(max {_MAX_SIZE_BYTES} bytes)" ) content_type = response.headers.get("content-type", "") return _parse_content(content, content_type, url) def _parse_content(content: str, content_type: str, url: str) -> dict: """Parse content as JSON or YAML based on content-type or URL extension.""" if _is_yaml_format(content_type, url): return _parse_yaml(content) if _is_json_format(content_type, url): return _parse_json(content) # Fall back: try JSON first, then YAML try: return _parse_json(content) except ValueError: return _parse_yaml(content) def _is_yaml_format(content_type: str, url: str) -> bool: """Check if the content is YAML format.""" yaml_types = {"application/x-yaml", "text/yaml", "application/yaml"} if any(t in content_type for t in yaml_types): return True lower_url = url.lower().split("?")[0] return lower_url.endswith(".yaml") or lower_url.endswith(".yml") def _is_json_format(content_type: str, url: str) -> bool: """Check if the content is JSON format.""" if "application/json" in content_type: return True lower_url = url.lower().split("?")[0] return lower_url.endswith(".json") def _parse_json(content: str) -> dict: """Parse content as JSON, raising ValueError on failure.""" try: result = json.loads(content) except json.JSONDecodeError as exc: raise ValueError(f"Invalid JSON: {exc}") from exc if not isinstance(result, dict): raise ValueError(f"Expected a JSON object, got {type(result).__name__}") return result def _parse_yaml(content: str) -> dict: """Parse content as YAML, raising ValueError on failure.""" try: result = yaml.safe_load(content) except yaml.YAMLError as exc: raise ValueError(f"Invalid YAML: {exc}") from exc if not isinstance(result, dict): raise ValueError(f"Expected a YAML mapping, got {type(result).__name__}") return result