fix: address security findings in Phase 4 analytics and replay

- Fix CRITICAL: use parameterized INTERVAL arithmetic (%(days)s * INTERVAL '1 day')
  instead of string interpolation inside SQL literal
- Use asyncio.gather() for parallel query execution in get_analytics()
- Add range upper bound (max 365 days) to prevent DoS via full-table scans
- Add thread_id validation (alphanumeric, max 128 chars) in replay API
- Sanitize error messages to not reflect user input
This commit is contained in:
Yaojia Wang
2026-03-31 13:38:09 +02:00
parent 33db5aeb10
commit ef6e5ac2be
3 changed files with 41 additions and 21 deletions

View File

@@ -17,6 +17,7 @@ router = APIRouter(prefix="/api/analytics", tags=["analytics"])
_RANGE_PATTERN = re.compile(r"^(\d+)d$")
_DEFAULT_RANGE = "7d"
_MAX_RANGE_DAYS = 365
async def _get_pool(request: Request) -> AsyncConnectionPool:
@@ -34,9 +35,15 @@ def _parse_range(range_str: str) -> int:
if not match:
raise HTTPException(
status_code=400,
detail=f"Invalid range format '{range_str}'. Expected format: '<N>d' e.g. '7d', '30d'.",
detail="Invalid range format. Expected: '<N>d' e.g. '7d', '30d'.",
)
return int(match.group(1))
days = int(match.group(1))
if days < 1 or days > _MAX_RANGE_DAYS:
raise HTTPException(
status_code=400,
detail=f"Range must be between 1 and {_MAX_RANGE_DAYS} days.",
)
return days
@router.get("")