Files
smart-support/backend/app/callbacks.py
Yaojia Wang 33488fd634 feat: complete phase 1 -- core framework with chat loop, agents, and React UI
Backend:
- FastAPI WebSocket /ws endpoint with streaming via LangGraph astream
- LangGraph Supervisor connecting 3 mock agents (order_lookup, order_actions, fallback)
- YAML Agent Registry with Pydantic validation and immutable configs
- PostgresSaver checkpoint persistence via langgraph-checkpoint-postgres
- Session TTL with 30-min sliding window and interrupt extension
- LLM provider abstraction (Anthropic/OpenAI/Google)
- Token usage + cost tracking callback handler
- Input validation: message size cap, thread_id format, content length
- Security: no hardcoded defaults, startup API key validation, no input reflection

Frontend:
- React 19 + TypeScript + Vite chat UI
- WebSocket hook with reconnect + exponential backoff
- Streaming token display with agent attribution
- Interrupt approval/reject UI for write operations
- Collapsible tool call viewer

Testing:
- 87 unit tests, 87% coverage (exceeds 80% requirement)
- Ruff lint + format clean

Infrastructure:
- Docker Compose (PostgreSQL 16 + backend)
- pyproject.toml with full dependency management
2026-03-30 00:54:21 +02:00

61 lines
2.0 KiB
Python

"""Token usage tracking callback handler."""
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from langchain_core.callbacks import BaseCallbackHandler
if TYPE_CHECKING:
from langchain_core.outputs import LLMResult
COST_PER_1K_TOKENS: dict[str, dict[str, float]] = {
"claude-sonnet-4-6": {"prompt": 0.003, "completion": 0.015},
"claude-haiku-4-5-20251001": {"prompt": 0.0008, "completion": 0.004},
"gpt-4o": {"prompt": 0.0025, "completion": 0.01},
"gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006},
}
DEFAULT_COST = {"prompt": 0.003, "completion": 0.015}
@dataclass(frozen=True)
class TokenUsage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
total_cost_usd: float
class TokenUsageCallbackHandler(BaseCallbackHandler):
"""Accumulates token usage and cost across LLM invocations."""
def __init__(self, model_name: str = "") -> None:
self._model_name = model_name
self._prompt_tokens = 0
self._completion_tokens = 0
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
if response.llm_output and "token_usage" in response.llm_output:
usage = response.llm_output["token_usage"]
self._prompt_tokens += usage.get("prompt_tokens", 0)
self._completion_tokens += usage.get("completion_tokens", 0)
def get_usage(self) -> TokenUsage:
costs = COST_PER_1K_TOKENS.get(self._model_name, DEFAULT_COST)
cost = (
self._prompt_tokens * costs["prompt"] / 1000
+ self._completion_tokens * costs["completion"] / 1000
)
return TokenUsage(
prompt_tokens=self._prompt_tokens,
completion_tokens=self._completion_tokens,
total_tokens=self._prompt_tokens + self._completion_tokens,
total_cost_usd=round(cost, 6),
)
def reset(self) -> None:
self._prompt_tokens = 0
self._completion_tokens = 0