fix: address security findings in Phase 4 analytics and replay

- Fix CRITICAL: use parameterized INTERVAL arithmetic (%(days)s * INTERVAL '1 day')
  instead of string interpolation inside SQL literal
- Use asyncio.gather() for parallel query execution in get_analytics()
- Add range upper bound (max 365 days) to prevent DoS via full-table scans
- Add thread_id validation (alphanumeric, max 128 chars) in replay API
- Sanitize error messages to not reflect user input
This commit is contained in:
Yaojia Wang
2026-03-31 13:38:09 +02:00
parent 33db5aeb10
commit ef6e5ac2be
3 changed files with 41 additions and 21 deletions

View File

@@ -2,10 +2,13 @@
from __future__ import annotations
import re
from typing import TYPE_CHECKING, Annotated, Any
from fastapi import APIRouter, HTTPException, Query, Request
_THREAD_ID_PATTERN = re.compile(r"^[a-zA-Z0-9\-_]{1,128}$")
if TYPE_CHECKING:
from psycopg_pool import AsyncConnectionPool
@@ -64,13 +67,16 @@ async def get_replay(
"""Return paginated replay steps for a conversation thread."""
from app.replay.transformer import transform_checkpoints
if not _THREAD_ID_PATTERN.match(thread_id):
raise HTTPException(status_code=400, detail="Invalid thread_id format")
pool = await get_pool(request)
async with pool.connection() as conn:
cursor = await conn.execute(_GET_CHECKPOINTS_SQL, {"thread_id": thread_id})
rows = await cursor.fetchall()
if not rows:
raise HTTPException(status_code=404, detail=f"Thread '{thread_id}' not found")
raise HTTPException(status_code=404, detail="Thread not found")
all_steps = transform_checkpoints([dict(row) for row in rows])
total_steps = len(all_steps)