fix: address critical security and code review findings in Phase 3

- Wire ImportOrchestrator into review_api start_import via BackgroundTasks
- Sanitize docstrings in generated tool code to prevent code injection
- Add Literal["read", "write"] validation for access_type
- Add regex validation for agent_group
- Validate URL scheme (http/https only) in ImportRequest
- Validate LLM output fields (clamp confidence, validate access_type)
- Use dataclasses.replace instead of manual reconstruction in importer
- Expand SSRF blocked networks (Carrier-Grade NAT, IPv4-mapped IPv6, etc.)
- Make _BLOCKED_NETWORKS immutable tuple
- Use yaml.safe_dump instead of yaml.dump
- Fix _to_snake_case for empty strings and Python keywords
This commit is contained in:
Yaojia Wang
2026-03-31 00:28:28 +02:00
parent a54eb224e0
commit a2f750269d
6 changed files with 128 additions and 28 deletions

View File

@@ -8,6 +8,7 @@ from __future__ import annotations
import logging
from collections.abc import Callable
from dataclasses import replace
from app.openapi.classifier import ClassifierProtocol, HeuristicClassifier
from app.openapi.fetcher import fetch_spec
@@ -100,14 +101,7 @@ class ImportOrchestrator:
def _update(job: ImportJob, **kwargs: object) -> ImportJob:
"""Return a new ImportJob with updated fields (immutable update)."""
return ImportJob(
job_id=job.job_id,
status=kwargs.get("status", job.status), # type: ignore[arg-type]
spec_url=job.spec_url,
total_endpoints=kwargs.get("total_endpoints", job.total_endpoints), # type: ignore[arg-type]
classified_count=kwargs.get("classified_count", job.classified_count), # type: ignore[arg-type]
error_message=kwargs.get("error_message", job.error_message), # type: ignore[arg-type]
)
return replace(job, **kwargs)
def _notify(callback: ProgressCallback, stage: str, job: ImportJob) -> None: