fix: address critical security and code review findings in Phase 3
- Wire ImportOrchestrator into review_api start_import via BackgroundTasks - Sanitize docstrings in generated tool code to prevent code injection - Add Literal["read", "write"] validation for access_type - Add regex validation for agent_group - Validate URL scheme (http/https only) in ImportRequest - Validate LLM output fields (clamp confidence, validate access_type) - Use dataclasses.replace instead of manual reconstruction in importer - Expand SSRF blocked networks (Carrier-Grade NAT, IPv4-mapped IPv6, etc.) - Make _BLOCKED_NETWORKS immutable tuple - Use yaml.safe_dump instead of yaml.dump - Fix _to_snake_case for empty strings and Python keywords
This commit is contained in:
@@ -6,6 +6,7 @@ YAML agent configurations from classification results.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import keyword
|
||||
import re
|
||||
|
||||
import yaml
|
||||
@@ -24,7 +25,7 @@ def generate_tool_code(classification: ClassificationResult, base_url: str) -> G
|
||||
func_name = _to_snake_case(ep.operation_id)
|
||||
params = _collect_params(ep)
|
||||
sig = _build_signature(params, ep.request_body_schema)
|
||||
docstring = ep.summary or ep.description or ep.operation_id
|
||||
docstring = _sanitize_docstring(ep.summary or ep.description or ep.operation_id)
|
||||
interrupt_comment = _interrupt_comment(classification)
|
||||
http_call = _build_http_call(ep, base_url, params)
|
||||
|
||||
@@ -59,7 +60,7 @@ def generate_agent_yaml(
|
||||
Groups tools by agent_group, creating one agent entry per group.
|
||||
"""
|
||||
if not classifications:
|
||||
return yaml.dump({"agents": []})
|
||||
return yaml.safe_dump({"agents": []})
|
||||
|
||||
groups: dict[str, dict] = {}
|
||||
for clf in classifications:
|
||||
@@ -75,7 +76,7 @@ def generate_agent_yaml(
|
||||
}
|
||||
groups[group]["tools"].append(func_name)
|
||||
|
||||
return yaml.dump({"agents": list(groups.values())}, sort_keys=False)
|
||||
return yaml.safe_dump({"agents": list(groups.values())}, sort_keys=False)
|
||||
|
||||
|
||||
# --- Private helpers ---
|
||||
@@ -149,8 +150,15 @@ def _schema_type_to_python(schema_type: str) -> str:
|
||||
return mapping.get(schema_type, "str")
|
||||
|
||||
|
||||
def _sanitize_docstring(text: str) -> str:
|
||||
"""Escape triple-quotes and newlines to prevent docstring injection."""
|
||||
return text.replace("\\", "\\\\").replace('"""', r"\"\"\"").replace("\n", " ")
|
||||
|
||||
|
||||
def _to_snake_case(name: str) -> str:
|
||||
"""Convert operationId to a valid snake_case Python identifier."""
|
||||
# Replace non-alphanumeric with underscore
|
||||
clean = re.sub(r"[^a-zA-Z0-9]+", "_", name).strip("_")
|
||||
return clean.lower()
|
||||
result = clean.lower() or "unnamed_tool"
|
||||
if keyword.iskeyword(result):
|
||||
result = f"{result}_tool"
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user