smart-support/backend/app/analytics/queries.py

"""Analytics query functions -- all async, take pool + range_days."""

from __future__ import annotations

from typing import TYPE_CHECKING

from app.analytics.models import AgentUsage, AnalyticsResult, InterruptStats

if TYPE_CHECKING:
    from psycopg_pool import AsyncConnectionPool

_RESOLUTION_RATE_SQL = """
SELECT
    CASE WHEN COUNT(*) = 0 THEN 0.0
         ELSE COUNT(*) FILTER (WHERE resolution_type = 'resolved')::float / COUNT(*)
    END AS rate
FROM conversations
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""

_ESCALATION_RATE_SQL = """
SELECT
    CASE WHEN COUNT(*) = 0 THEN 0.0
         ELSE COUNT(*) FILTER (WHERE resolution_type = 'escalated')::float / COUNT(*)
    END AS rate
FROM conversations
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""

_TOTAL_CONVERSATIONS_SQL = """
SELECT COUNT(*) AS total
FROM conversations
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""

_AVG_TURNS_SQL = """
SELECT COALESCE(AVG(turn_count), 0.0) AS avg_turns
FROM conversations
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""

_COST_PER_CONVERSATION_SQL = """
SELECT COALESCE(AVG(total_cost_usd), 0.0) AS avg_cost
FROM conversations
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""

_AGENT_USAGE_SQL = """
SELECT
    agent,
    COUNT(*) AS count,
    ROUND(COUNT(*) * 100.0 / NULLIF(SUM(COUNT(*)) OVER (), 0), 2) AS percentage
FROM (
    SELECT UNNEST(agents_used) AS agent
    FROM conversations
    WHERE created_at >= NOW() - INTERVAL '%(days)s days'
      AND agents_used IS NOT NULL
) sub
GROUP BY agent
ORDER BY count DESC
"""

_INTERRUPT_STATS_SQL = """
SELECT
    COUNT(*) FILTER (WHERE event_type = 'interrupt') AS total,
    COUNT(*) FILTER (WHERE event_type = 'interrupt' AND success = TRUE) AS approved,
    COUNT(*) FILTER (WHERE event_type = 'interrupt' AND success = FALSE
                     AND error_message IS NULL) AS rejected,
    COUNT(*) FILTER (WHERE event_type = 'interrupt' AND error_message = 'expired') AS expired
FROM analytics_events
WHERE created_at >= NOW() - INTERVAL '%(days)s days'
"""


async def resolution_rate(pool: AsyncConnectionPool, range_days: int) -> float:
    """Return the fraction of resolved conversations in the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_RESOLUTION_RATE_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return 0.0
    return float(row.get("rate") or 0.0)


async def escalation_rate(pool: AsyncConnectionPool, range_days: int) -> float:
    """Return the fraction of escalated conversations in the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_ESCALATION_RATE_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return 0.0
    return float(row.get("rate") or 0.0)


async def _total_conversations(pool: AsyncConnectionPool, range_days: int) -> int:
    """Return the total number of conversations in the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_TOTAL_CONVERSATIONS_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return 0
    return int(row.get("total") or 0)


async def _avg_turns(pool: AsyncConnectionPool, range_days: int) -> float:
    """Return the average turn count per conversation in the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_AVG_TURNS_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return 0.0
    return float(row.get("avg_turns") or 0.0)


async def cost_per_conversation(pool: AsyncConnectionPool, range_days: int) -> float:
    """Return the average cost per conversation in the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_COST_PER_CONVERSATION_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return 0.0
    return float(row.get("avg_cost") or 0.0)


async def agent_usage(pool: AsyncConnectionPool, range_days: int) -> tuple[AgentUsage, ...]:
    """Return per-agent usage statistics for the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_AGENT_USAGE_SQL, {"days": range_days})
        rows = await cursor.fetchall()
    if not rows:
        return ()
    return tuple(
        AgentUsage(
            agent=row["agent"],
            count=int(row["count"]),
            percentage=float(row["percentage"]),
        )
        for row in rows
    )


async def interrupt_stats(pool: AsyncConnectionPool, range_days: int) -> InterruptStats:
    """Return interrupt approval/rejection statistics for the given range."""
    async with pool.connection() as conn:
        cursor = await conn.execute(_INTERRUPT_STATS_SQL, {"days": range_days})
        row = await cursor.fetchone()
    if not row:
        return InterruptStats()
    return InterruptStats(
        total=int(row.get("total") or 0),
        approved=int(row.get("approved") or 0),
        rejected=int(row.get("rejected") or 0),
        expired=int(row.get("expired") or 0),
    )


async def get_analytics(pool: AsyncConnectionPool, range_days: int) -> AnalyticsResult:
    """Aggregate all analytics metrics into a single AnalyticsResult."""
    res_rate, esc_rate, cost, usage, i_stats, total, avg_t = (
        await resolution_rate(pool, range_days),
        await escalation_rate(pool, range_days),
        await cost_per_conversation(pool, range_days),
        await agent_usage(pool, range_days),
        await interrupt_stats(pool, range_days),
        await _total_conversations(pool, range_days),
        await _avg_turns(pool, range_days),
    )
    return AnalyticsResult(
        range=f"{range_days}d",
        total_conversations=total,
        resolution_rate=res_rate,
        escalation_rate=esc_rate,
        avg_turns_per_conversation=avg_t,
        avg_cost_per_conversation_usd=cost,
        agent_usage=usage,
        interrupt_stats=i_stats,
    )