feat: add t-SNE stock clustering and similarity search (TDD)

2 new endpoints:
- POST /portfolio/cluster - t-SNE + KMeans clustering by return
  similarity. Maps stocks to 2D coordinates with cluster labels.
- POST /portfolio/similar - find most/least similar stocks by
  return correlation against a target symbol.

Implementation:
- sklearn TSNE (method=exact) + KMeans with auto n_clusters
- Jitter handling for identical returns edge case
- 33 new tests (17 service unit + 16 route integration)
- All 503 tests passing
This commit is contained in:
Yaojia Wang
2026-03-19 22:53:27 +01:00
parent 9ee3ec9b4e
commit 4915f1bae4
4 changed files with 759 additions and 1 deletions

View File

@@ -52,3 +52,45 @@ async def portfolio_risk_parity(request: PortfolioOptimizeRequest):
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
return ApiResponse(data=result)
class ClusterRequest(BaseModel):
symbols: list[str] = Field(..., min_length=3, max_length=50)
days: int = Field(default=180, ge=30, le=3650)
n_clusters: int | None = Field(default=None, ge=2, le=20)
class SimilarRequest(BaseModel):
symbol: str = Field(..., min_length=1, max_length=20)
universe: list[str] = Field(..., min_length=2, max_length=50)
days: int = Field(default=180, ge=30, le=3650)
top_n: int = Field(default=5, ge=1, le=20)
@router.post("/cluster", response_model=ApiResponse)
@safe
async def portfolio_cluster(request: ClusterRequest):
"""Cluster stocks by return similarity using t-SNE + KMeans."""
try:
result = await portfolio_service.cluster_stocks(
request.symbols, days=request.days, n_clusters=request.n_clusters
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
return ApiResponse(data=result)
@router.post("/similar", response_model=ApiResponse)
@safe
async def portfolio_similar(request: SimilarRequest):
"""Find stocks most/least similar to a target by return correlation."""
try:
result = await portfolio_service.find_similar_stocks(
request.symbol,
request.universe,
days=request.days,
top_n=request.top_n,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
return ApiResponse(data=result)