diff --git a/backend/app/main.py b/backend/app/main.py
index 41022d5..9a70380 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -93,7 +93,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     await pool.close()
 
 
-app = FastAPI(title="Smart Support", version="0.5.0", lifespan=lifespan)
+_VERSION = "0.5.0"
+
+app = FastAPI(title="Smart Support", version=_VERSION, lifespan=lifespan)
 
 app.include_router(openapi_router)
 app.include_router(replay_router)
@@ -103,7 +105,7 @@ app.include_router(analytics_router)
 @app.get("/api/health")
 def health_check() -> dict:
     """Health check endpoint for load balancers and monitoring."""
-    return {"status": "ok", "version": "0.5.0"}
+    return {"status": "ok", "version": _VERSION}
 
 
 @app.websocket("/ws")
diff --git a/backend/app/ws_handler.py b/backend/app/ws_handler.py
index 6dfc55d..8c6f383 100644
--- a/backend/app/ws_handler.py
+++ b/backend/app/ws_handler.py
@@ -33,9 +33,17 @@ THREAD_ID_PATTERN = re.compile(r"^[a-zA-Z0-9\-_]{1,128}$")
 # Rate limiting: max 10 messages per 10-second window, per thread
 _RATE_LIMIT_MAX = 10
 _RATE_LIMIT_WINDOW = 10.0
+_MAX_TRACKED_THREADS = 10_000
 _thread_timestamps: dict[str, list[float]] = defaultdict(list)
 
 
+def _evict_stale_threads(cutoff: float) -> None:
+    """Remove thread entries with no recent timestamps to prevent memory leak."""
+    stale = [tid for tid, ts in _thread_timestamps.items() if not ts or ts[-1] < cutoff]
+    for tid in stale:
+        del _thread_timestamps[tid]
+
+
 async def handle_user_message(
     ws: WebSocket,
     graph: CompiledStateGraph,
@@ -245,15 +253,16 @@ async def dispatch_message(
             await _send_json(ws, {"type": "error", "message": "Message content too long"})
             return
 
-        # Rate limiting check
+        # Rate limiting check (per-thread, with bounded memory)
         now = time.time()
-        timestamps = _thread_timestamps[thread_id]
         cutoff = now - _RATE_LIMIT_WINDOW
-        _thread_timestamps[thread_id] = [t for t in timestamps if t >= cutoff]
-        if len(_thread_timestamps[thread_id]) >= _RATE_LIMIT_MAX:
+        if len(_thread_timestamps) > _MAX_TRACKED_THREADS:
+            _evict_stale_threads(cutoff)
+        recent = [t for t in _thread_timestamps[thread_id] if t >= cutoff]
+        if len(recent) >= _RATE_LIMIT_MAX:
             await _send_json(ws, {"type": "error", "message": "Rate limit exceeded"})
             return
-        _thread_timestamps[thread_id].append(now)
+        _thread_timestamps[thread_id] = [*recent, now]
 
         await handle_user_message(
             ws, graph, session_manager, callback_handler, thread_id, content,
diff --git a/docker-compose.yml b/docker-compose.yml
index 79f5e68..4172462 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,8 +5,7 @@ services:
       POSTGRES_DB: smart_support
       POSTGRES_USER: smart_support
       POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dev_password}
-    ports:
-      - "5432:5432"
+    # ports: ["5432:5432"]  # Uncomment for local dev DB access only
     volumes:
       - pgdata:/var/lib/postgresql/data
     healthcheck:
@@ -14,6 +13,8 @@ services:
       interval: 5s
       timeout: 3s
       retries: 5
+    networks:
+      - app_network
 
   backend:
     build:
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
index 43584bf..b85fa04 100644
--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@@ -1,8 +1,14 @@
 server {
     listen 80;
+    server_tokens off;
     root /usr/share/nginx/html;
     index index.html;
 
+    add_header X-Frame-Options "SAMEORIGIN" always;
+    add_header X-Content-Type-Options "nosniff" always;
+    add_header X-XSS-Protection "1; mode=block" always;
+    add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+
     location /api/ {
         proxy_pass http://backend:8000;
         proxy_set_header Host $host;