fix: address critical security and code review findings in Phase 3

- Wire ImportOrchestrator into review_api start_import via BackgroundTasks - Sanitize docstrings in generated tool code to prevent code injection - Add Literal["read", "write"] validation for access_type - Add regex validation for agent_group - Validate URL scheme (http/https only) in ImportRequest - Validate LLM output fields (clamp confidence, validate access_type) - Use dataclasses.replace instead of manual reconstruction in importer - Expand SSRF blocked networks (Carrier-Grade NAT, IPv4-mapped IPv6, etc.) - Make _BLOCKED_NETWORKS immutable tuple - Use yaml.safe_dump instead of yaml.dump - Fix _to_snake_case for empty strings and Python keywords
2026-03-31 00:28:28 +02:00
parent a54eb224e0
commit a2f750269d
6 changed files with 128 additions and 28 deletions
--- a/backend/app/openapi/classifier.py
+++ b/backend/app/openapi/classifier.py
@@ -150,13 +150,18 @@ def _parse_llm_response(

    results: list[ClassificationResult] = []
    for ep, item in zip(endpoints, items, strict=True):
+        raw_access = item.get("access_type", "read")
+        access_type = raw_access if raw_access in {"read", "write"} else "read"
+        confidence = max(0.0, min(1.0, float(item.get("confidence", 0.8))))
+        raw_group = str(item.get("agent_group", "support"))
+        agent_group = raw_group if raw_group.strip() else "support"
        results.append(
            ClassificationResult(
                endpoint=ep,
-                access_type=item.get("access_type", "read"),
+                access_type=access_type,
                customer_params=tuple(item.get("customer_params", [])),
-                agent_group=item.get("agent_group", "support"),
-                confidence=float(item.get("confidence", 0.8)),
+                agent_group=agent_group,
+                confidence=confidence,
                needs_interrupt=bool(item.get("needs_interrupt", False)),
            )
        )