feat: add t-SNE stock clustering and similarity search (TDD)
2 new endpoints: - POST /portfolio/cluster - t-SNE + KMeans clustering by return similarity. Maps stocks to 2D coordinates with cluster labels. - POST /portfolio/similar - find most/least similar stocks by return correlation against a target symbol. Implementation: - sklearn TSNE (method=exact) + KMeans with auto n_clusters - Jitter handling for identical returns edge case - 33 new tests (17 service unit + 16 route integration) - All 503 tests passing
This commit is contained in:
@@ -223,3 +223,271 @@ async def test_portfolio_risk_parity_default_days(mock_fn, client):
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_fn.assert_called_once_with(["AAPL"], days=365)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/v1/portfolio/cluster
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CLUSTER_RESULT = {
|
||||
"symbols": ["AAPL", "MSFT", "GOOGL", "AMZN", "JPM", "BAC"],
|
||||
"coordinates": [
|
||||
{"symbol": "AAPL", "x": 12.5, "y": -3.2, "cluster": 0},
|
||||
{"symbol": "MSFT", "x": 11.8, "y": -2.9, "cluster": 0},
|
||||
{"symbol": "GOOGL", "x": 10.1, "y": -1.5, "cluster": 0},
|
||||
{"symbol": "AMZN", "x": 9.5, "y": -0.8, "cluster": 0},
|
||||
{"symbol": "JPM", "x": -5.1, "y": 8.3, "cluster": 1},
|
||||
{"symbol": "BAC", "x": -4.9, "y": 7.9, "cluster": 1},
|
||||
],
|
||||
"clusters": {"0": ["AAPL", "MSFT", "GOOGL", "AMZN"], "1": ["JPM", "BAC"]},
|
||||
"method": "t-SNE + KMeans",
|
||||
"n_clusters": 2,
|
||||
"days": 180,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.cluster_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_cluster_happy_path(mock_fn, client):
|
||||
"""POST /cluster returns 200 with valid cluster result."""
|
||||
mock_fn.return_value = _CLUSTER_RESULT
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL", "AMZN", "JPM", "BAC"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["success"] is True
|
||||
assert data["data"]["method"] == "t-SNE + KMeans"
|
||||
assert "coordinates" in data["data"]
|
||||
assert "clusters" in data["data"]
|
||||
mock_fn.assert_called_once_with(
|
||||
["AAPL", "MSFT", "GOOGL", "AMZN", "JPM", "BAC"], days=180, n_clusters=None
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.cluster_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_cluster_with_custom_n_clusters(mock_fn, client):
|
||||
"""n_clusters is forwarded to service when provided."""
|
||||
mock_fn.return_value = _CLUSTER_RESULT
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={
|
||||
"symbols": ["AAPL", "MSFT", "GOOGL", "AMZN", "JPM", "BAC"],
|
||||
"days": 180,
|
||||
"n_clusters": 3,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_fn.assert_called_once_with(
|
||||
["AAPL", "MSFT", "GOOGL", "AMZN", "JPM", "BAC"], days=180, n_clusters=3
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_cluster_too_few_symbols_returns_422(client):
|
||||
"""Fewer than 3 symbols triggers Pydantic validation error (422)."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_cluster_missing_symbols_returns_422(client):
|
||||
"""Missing symbols field returns 422."""
|
||||
resp = await client.post("/api/v1/portfolio/cluster", json={"days": 180})
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_cluster_too_many_symbols_returns_422(client):
|
||||
"""More than 50 symbols returns 422."""
|
||||
symbols = [f"SYM{i}" for i in range(51)]
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster", json={"symbols": symbols, "days": 180}
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_cluster_days_below_minimum_returns_422(client):
|
||||
"""days < 30 returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL"], "days": 10},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_cluster_n_clusters_below_minimum_returns_422(client):
|
||||
"""n_clusters < 2 returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL"], "days": 180, "n_clusters": 1},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.cluster_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_cluster_value_error_returns_400(mock_fn, client):
|
||||
"""ValueError from service returns 400."""
|
||||
mock_fn.side_effect = ValueError("at least 3 symbols required")
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.cluster_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_cluster_upstream_error_returns_502(mock_fn, client):
|
||||
"""Unexpected exception from service returns 502."""
|
||||
mock_fn.side_effect = RuntimeError("upstream failure")
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 502
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.cluster_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_cluster_default_days(mock_fn, client):
|
||||
"""Default days=180 is used when not provided."""
|
||||
mock_fn.return_value = _CLUSTER_RESULT
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/cluster",
|
||||
json={"symbols": ["AAPL", "MSFT", "GOOGL"]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_fn.assert_called_once_with(
|
||||
["AAPL", "MSFT", "GOOGL"], days=180, n_clusters=None
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/v1/portfolio/similar
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SIMILAR_RESULT = {
|
||||
"symbol": "AAPL",
|
||||
"most_similar": [
|
||||
{"symbol": "MSFT", "correlation": 0.85},
|
||||
{"symbol": "GOOGL", "correlation": 0.78},
|
||||
],
|
||||
"least_similar": [
|
||||
{"symbol": "JPM", "correlation": 0.32},
|
||||
{"symbol": "BAC", "correlation": 0.28},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.find_similar_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_similar_happy_path(mock_fn, client):
|
||||
"""POST /similar returns 200 with most_similar and least_similar."""
|
||||
mock_fn.return_value = _SIMILAR_RESULT
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={
|
||||
"symbol": "AAPL",
|
||||
"universe": ["MSFT", "GOOGL", "AMZN", "JPM", "BAC"],
|
||||
"days": 180,
|
||||
"top_n": 2,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["success"] is True
|
||||
assert data["data"]["symbol"] == "AAPL"
|
||||
assert "most_similar" in data["data"]
|
||||
assert "least_similar" in data["data"]
|
||||
mock_fn.assert_called_once_with(
|
||||
"AAPL",
|
||||
["MSFT", "GOOGL", "AMZN", "JPM", "BAC"],
|
||||
days=180,
|
||||
top_n=2,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_similar_missing_symbol_returns_422(client):
|
||||
"""Missing symbol field returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"universe": ["MSFT", "GOOGL"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_similar_missing_universe_returns_422(client):
|
||||
"""Missing universe field returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "days": 180},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_similar_universe_too_small_returns_422(client):
|
||||
"""universe with fewer than 2 entries returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "universe": ["MSFT"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_portfolio_similar_top_n_below_minimum_returns_422(client):
|
||||
"""top_n < 1 returns 422."""
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "universe": ["MSFT", "GOOGL"], "days": 180, "top_n": 0},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.find_similar_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_similar_value_error_returns_400(mock_fn, client):
|
||||
"""ValueError from service returns 400."""
|
||||
mock_fn.side_effect = ValueError("AAPL not found in price data")
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "universe": ["MSFT", "GOOGL"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.find_similar_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_similar_upstream_error_returns_502(mock_fn, client):
|
||||
"""Unexpected exception from service returns 502."""
|
||||
mock_fn.side_effect = RuntimeError("upstream failure")
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "universe": ["MSFT", "GOOGL"], "days": 180},
|
||||
)
|
||||
assert resp.status_code == 502
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("routes_portfolio.portfolio_service.find_similar_stocks", new_callable=AsyncMock)
|
||||
async def test_portfolio_similar_default_top_n(mock_fn, client):
|
||||
"""Default top_n=5 is passed to service when not specified."""
|
||||
mock_fn.return_value = _SIMILAR_RESULT
|
||||
resp = await client.post(
|
||||
"/api/v1/portfolio/similar",
|
||||
json={"symbol": "AAPL", "universe": ["MSFT", "GOOGL", "AMZN"]},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_fn.assert_called_once_with("AAPL", ["MSFT", "GOOGL", "AMZN"], days=180, top_n=5)
|
||||
|
||||
Reference in New Issue
Block a user