feat: add backtesting engine with 4 strategies (TDD)

Strategies:
- POST /backtest/sma-crossover - SMA crossover (short/long window)
- POST /backtest/rsi - RSI oversold/overbought signals
- POST /backtest/buy-and-hold - passive benchmark
- POST /backtest/momentum - multi-symbol momentum rotation

Returns: total_return, annualized_return, sharpe_ratio, max_drawdown,
win_rate, total_trades, equity_curve (last 20 points)

Implementation: pure pandas/numpy, no external backtesting libs.
Shared _compute_metrics helper across all strategies.
79 new tests (46 service unit + 33 route integration).
All 391 tests passing.
This commit is contained in:
Yaojia Wang
2026-03-19 22:35:00 +01:00
parent 42ba359c48
commit 5c7a0ee4c0
5 changed files with 1585 additions and 0 deletions

View File

@@ -0,0 +1,627 @@
"""Unit tests for backtest_service - written FIRST (TDD RED phase)."""
import numpy as np
import pandas as pd
import pytest
import backtest_service
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_equity(values: list[float]) -> pd.Series:
"""Build a simple equity-curve Series from a list of values."""
return pd.Series(values, dtype=float)
def _rising_prices(n: int = 100, start: float = 100.0, step: float = 1.0) -> pd.Series:
"""Linearly rising price series."""
return pd.Series([start + i * step for i in range(n)], dtype=float)
def _flat_prices(n: int = 100, price: float = 100.0) -> pd.Series:
"""Flat price series - no movement."""
return pd.Series([price] * n, dtype=float)
def _oscillating_prices(n: int = 200, period: int = 40) -> pd.Series:
"""Sinusoidal price series to generate crossover signals."""
t = np.arange(n)
prices = 100 + 20 * np.sin(2 * np.pi * t / period)
return pd.Series(prices, dtype=float)
# ---------------------------------------------------------------------------
# _compute_metrics tests
# ---------------------------------------------------------------------------
class TestComputeMetrics:
def test_total_return_positive(self):
equity = _make_equity([10000, 11000, 12000])
result = backtest_service._compute_metrics(equity, trades=1)
assert result["total_return"] == pytest.approx(0.2, abs=1e-6)
def test_total_return_negative(self):
equity = _make_equity([10000, 9000, 8000])
result = backtest_service._compute_metrics(equity, trades=1)
assert result["total_return"] == pytest.approx(-0.2, abs=1e-6)
def test_total_return_zero_on_flat(self):
equity = _make_equity([10000, 10000, 10000])
result = backtest_service._compute_metrics(equity, trades=0)
assert result["total_return"] == pytest.approx(0.0, abs=1e-6)
def test_annualized_return_shape(self):
# 252 daily bars => 1 trading year; 10000 -> 11000 = +10% annualized
values = [10000 * (1.0 + 0.1 / 252) ** i for i in range(253)]
equity = _make_equity(values)
result = backtest_service._compute_metrics(equity, trades=5)
# Should be close to 10% annualized
assert result["annualized_return"] == pytest.approx(0.1, abs=0.01)
def test_sharpe_ratio_positive_drift(self):
# Steadily rising equity with small daily increments -> positive Sharpe
values = [10000 + i * 10 for i in range(252)]
equity = _make_equity(values)
result = backtest_service._compute_metrics(equity, trades=5)
assert result["sharpe_ratio"] > 0
def test_sharpe_ratio_none_on_single_point(self):
equity = _make_equity([10000])
result = backtest_service._compute_metrics(equity, trades=0)
assert result["sharpe_ratio"] is None
def test_sharpe_ratio_none_on_zero_std(self):
# Perfectly flat equity => std = 0, Sharpe undefined
equity = _make_equity([10000] * 50)
result = backtest_service._compute_metrics(equity, trades=0)
assert result["sharpe_ratio"] is None
def test_max_drawdown_known_value(self):
# Peak 12000, trough 8000 => drawdown = (8000-12000)/12000 = -1/3
equity = _make_equity([10000, 12000, 8000, 9000])
result = backtest_service._compute_metrics(equity, trades=2)
assert result["max_drawdown"] == pytest.approx(-1 / 3, abs=1e-6)
def test_max_drawdown_zero_on_monotone_rise(self):
equity = _make_equity([10000, 11000, 12000, 13000])
result = backtest_service._compute_metrics(equity, trades=1)
assert result["max_drawdown"] == pytest.approx(0.0, abs=1e-6)
def test_total_trades_propagated(self):
equity = _make_equity([10000, 11000])
result = backtest_service._compute_metrics(equity, trades=7)
assert result["total_trades"] == 7
def test_win_rate_zero_trades(self):
equity = _make_equity([10000, 10000])
result = backtest_service._compute_metrics(equity, trades=0)
assert result["win_rate"] is None
def test_equity_curve_last_20_points(self):
values = list(range(100, 160)) # 60 points
equity = _make_equity(values)
result = backtest_service._compute_metrics(equity, trades=10)
assert len(result["equity_curve"]) == 20
assert result["equity_curve"][-1] == pytest.approx(159.0, abs=1e-6)
def test_equity_curve_shorter_than_20(self):
values = [10000, 11000, 12000]
equity = _make_equity(values)
result = backtest_service._compute_metrics(equity, trades=1)
assert len(result["equity_curve"]) == 3
def test_result_keys_present(self):
equity = _make_equity([10000, 11000])
result = backtest_service._compute_metrics(equity, trades=1)
expected_keys = {
"total_return",
"annualized_return",
"sharpe_ratio",
"max_drawdown",
"win_rate",
"total_trades",
"equity_curve",
}
assert expected_keys.issubset(result.keys())
# ---------------------------------------------------------------------------
# _compute_sma_signals tests
# ---------------------------------------------------------------------------
class TestComputeSmaSignals:
def test_returns_series_with_position_column(self):
prices = _oscillating_prices(200, period=40)
positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
assert isinstance(positions, pd.Series)
assert len(positions) == len(prices)
def test_positions_are_zero_or_one(self):
prices = _oscillating_prices(200, period=40)
positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
unique_vals = set(positions.dropna().unique())
assert unique_vals.issubset({0, 1})
def test_no_position_before_long_window(self):
prices = _oscillating_prices(200, period=40)
positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
# Before long_window-1 data points, positions should be 0
assert (positions.iloc[: 19] == 0).all()
def test_generates_at_least_one_signal_on_oscillating(self):
prices = _oscillating_prices(300, period=60)
positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
# Should flip between 0 and 1 at least once on oscillating data
changes = positions.diff().abs().sum()
assert changes > 0
def test_flat_prices_produce_no_signals(self):
prices = _flat_prices(100)
positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
# After warm-up both SMAs equal price; short never strictly above long
assert (positions == 0).all()
# ---------------------------------------------------------------------------
# _compute_rsi tests
# ---------------------------------------------------------------------------
class TestComputeRsi:
def test_rsi_length(self):
prices = _rising_prices(50)
rsi = backtest_service._compute_rsi(prices, period=14)
assert len(rsi) == len(prices)
def test_rsi_range(self):
prices = _oscillating_prices(100, period=20)
rsi = backtest_service._compute_rsi(prices, period=14)
valid = rsi.dropna()
assert (valid >= 0).all()
assert (valid <= 100).all()
def test_rsi_rising_prices_high(self):
# Monotonically rising prices => RSI should be high (>= 70)
prices = _rising_prices(80, step=1.0)
rsi = backtest_service._compute_rsi(prices, period=14)
# After warm-up period, RSI should be very high
assert rsi.iloc[-1] >= 70
def test_rsi_falling_prices_low(self):
# Monotonically falling prices => RSI should be low (<= 30)
prices = pd.Series([100 - i * 0.8 for i in range(80)], dtype=float)
rsi = backtest_service._compute_rsi(prices, period=14)
assert rsi.iloc[-1] <= 30
# ---------------------------------------------------------------------------
# _compute_rsi_signals tests
# ---------------------------------------------------------------------------
class TestComputeRsiSignals:
def test_returns_series(self):
prices = _oscillating_prices(200, period=40)
positions = backtest_service._compute_rsi_signals(
prices, period=14, oversold=30, overbought=70
)
assert isinstance(positions, pd.Series)
assert len(positions) == len(prices)
def test_positions_are_zero_or_one(self):
prices = _oscillating_prices(200, period=40)
positions = backtest_service._compute_rsi_signals(
prices, period=14, oversold=30, overbought=70
)
unique_vals = set(positions.dropna().unique())
assert unique_vals.issubset({0, 1})
# ---------------------------------------------------------------------------
# backtest_sma_crossover tests (async integration of service layer)
# ---------------------------------------------------------------------------
class TestBacktestSmaCrossover:
@pytest.fixture
def mock_hist(self, monkeypatch):
"""Patch fetch_historical to return a synthetic OBBject-like result."""
prices = _oscillating_prices(300, period=60).tolist()
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
return FakeResult()
@pytest.mark.asyncio
async def test_returns_all_required_keys(self, mock_hist):
result = await backtest_service.backtest_sma_crossover(
"AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
)
required = {
"total_return",
"annualized_return",
"sharpe_ratio",
"max_drawdown",
"win_rate",
"total_trades",
"equity_curve",
}
assert required.issubset(result.keys())
@pytest.mark.asyncio
async def test_equity_curve_max_20_points(self, mock_hist):
result = await backtest_service.backtest_sma_crossover(
"AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
)
assert len(result["equity_curve"]) <= 20
@pytest.mark.asyncio
async def test_raises_value_error_on_no_data(self, monkeypatch):
async def fake_fetch(symbol, days, **kwargs):
return None
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="No historical data"):
await backtest_service.backtest_sma_crossover(
"AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
)
@pytest.mark.asyncio
async def test_initial_capital_reflected_in_equity(self, mock_hist):
result = await backtest_service.backtest_sma_crossover(
"AAPL", short_window=5, long_window=20, days=365, initial_capital=50000
)
# equity_curve values should be in range related to 50000 initial capital
assert result["equity_curve"][0] > 0
# ---------------------------------------------------------------------------
# backtest_rsi tests
# ---------------------------------------------------------------------------
class TestBacktestRsi:
@pytest.fixture
def mock_hist(self, monkeypatch):
prices = _oscillating_prices(300, period=60).tolist()
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
@pytest.mark.asyncio
async def test_returns_all_required_keys(self, mock_hist):
result = await backtest_service.backtest_rsi(
"AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
)
required = {
"total_return",
"annualized_return",
"sharpe_ratio",
"max_drawdown",
"win_rate",
"total_trades",
"equity_curve",
}
assert required.issubset(result.keys())
@pytest.mark.asyncio
async def test_equity_curve_max_20_points(self, mock_hist):
result = await backtest_service.backtest_rsi(
"AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
)
assert len(result["equity_curve"]) <= 20
@pytest.mark.asyncio
async def test_raises_value_error_on_no_data(self, monkeypatch):
async def fake_fetch(symbol, days, **kwargs):
return None
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="No historical data"):
await backtest_service.backtest_rsi(
"AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
)
# ---------------------------------------------------------------------------
# backtest_buy_and_hold tests
# ---------------------------------------------------------------------------
class TestBacktestBuyAndHold:
@pytest.fixture
def mock_hist_rising(self, monkeypatch):
prices = _rising_prices(252, start=100.0, step=1.0).tolist()
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
@pytest.mark.asyncio
async def test_returns_all_required_keys(self, mock_hist_rising):
result = await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)
required = {
"total_return",
"annualized_return",
"sharpe_ratio",
"max_drawdown",
"win_rate",
"total_trades",
"equity_curve",
}
assert required.issubset(result.keys())
@pytest.mark.asyncio
async def test_total_trades_always_one(self, mock_hist_rising):
result = await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)
assert result["total_trades"] == 1
@pytest.mark.asyncio
async def test_rising_prices_positive_return(self, mock_hist_rising):
result = await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)
assert result["total_return"] > 0
@pytest.mark.asyncio
async def test_known_return_value(self, monkeypatch):
# 100 -> 200: 100% total return
prices = [100.0, 200.0]
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
result = await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)
assert result["total_return"] == pytest.approx(1.0, abs=1e-6)
assert result["equity_curve"][-1] == pytest.approx(20000.0, abs=1e-6)
@pytest.mark.asyncio
async def test_raises_value_error_on_no_data(self, monkeypatch):
async def fake_fetch(symbol, days, **kwargs):
return None
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="No historical data"):
await backtest_service.backtest_buy_and_hold("AAPL", days=365, initial_capital=10000)
@pytest.mark.asyncio
async def test_flat_prices_zero_return(self, monkeypatch):
prices = _flat_prices(50).tolist()
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
result = await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)
assert result["total_return"] == pytest.approx(0.0, abs=1e-6)
# ---------------------------------------------------------------------------
# backtest_momentum tests
# ---------------------------------------------------------------------------
class TestBacktestMomentum:
@pytest.fixture
def mock_multi_hist(self, monkeypatch):
"""Three symbols with different return profiles."""
aapl_prices = _rising_prices(200, start=100.0, step=2.0).tolist()
msft_prices = _rising_prices(200, start=100.0, step=0.5).tolist()
googl_prices = _flat_prices(200, price=150.0).tolist()
price_map = {
"AAPL": aapl_prices,
"MSFT": msft_prices,
"GOOGL": googl_prices,
}
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
def __init__(self, prices):
self.results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult(price_map[symbol])
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
@pytest.mark.asyncio
async def test_returns_all_required_keys(self, mock_multi_hist):
result = await backtest_service.backtest_momentum(
symbols=["AAPL", "MSFT", "GOOGL"],
lookback=20,
top_n=2,
rebalance_days=30,
days=365,
initial_capital=10000,
)
required = {
"total_return",
"annualized_return",
"sharpe_ratio",
"max_drawdown",
"win_rate",
"total_trades",
"equity_curve",
"allocation_history",
}
assert required.issubset(result.keys())
@pytest.mark.asyncio
async def test_allocation_history_is_list(self, mock_multi_hist):
result = await backtest_service.backtest_momentum(
symbols=["AAPL", "MSFT", "GOOGL"],
lookback=20,
top_n=2,
rebalance_days=30,
days=365,
initial_capital=10000,
)
assert isinstance(result["allocation_history"], list)
@pytest.mark.asyncio
async def test_top_n_respected_in_allocations(self, mock_multi_hist):
result = await backtest_service.backtest_momentum(
symbols=["AAPL", "MSFT", "GOOGL"],
lookback=20,
top_n=2,
rebalance_days=30,
days=365,
initial_capital=10000,
)
for entry in result["allocation_history"]:
assert len(entry["symbols"]) <= 2
@pytest.mark.asyncio
async def test_raises_value_error_on_no_data(self, monkeypatch):
async def fake_fetch(symbol, days, **kwargs):
return None
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="No price data"):
await backtest_service.backtest_momentum(
symbols=["AAPL", "MSFT"],
lookback=20,
top_n=1,
rebalance_days=30,
days=365,
initial_capital=10000,
)
@pytest.mark.asyncio
async def test_equity_curve_max_20_points(self, mock_multi_hist):
result = await backtest_service.backtest_momentum(
symbols=["AAPL", "MSFT", "GOOGL"],
lookback=20,
top_n=2,
rebalance_days=30,
days=365,
initial_capital=10000,
)
assert len(result["equity_curve"]) <= 20
# ---------------------------------------------------------------------------
# Edge case: insufficient data
# ---------------------------------------------------------------------------
class TestEdgeCases:
@pytest.mark.asyncio
async def test_sma_crossover_insufficient_bars_raises(self, monkeypatch):
"""Fewer bars than long_window should raise ValueError."""
prices = [100.0, 101.0, 102.0] # Only 3 bars
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="Insufficient data"):
await backtest_service.backtest_sma_crossover(
"AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
)
@pytest.mark.asyncio
async def test_rsi_insufficient_bars_raises(self, monkeypatch):
prices = [100.0, 101.0]
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="Insufficient data"):
await backtest_service.backtest_rsi(
"AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
)
@pytest.mark.asyncio
async def test_buy_and_hold_single_bar_raises(self, monkeypatch):
prices = [100.0]
class FakeBar:
def __init__(self, close):
self.close = close
class FakeResult:
results = [FakeBar(p) for p in prices]
async def fake_fetch(symbol, days, **kwargs):
return FakeResult()
monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
with pytest.raises(ValueError, match="Insufficient data"):
await backtest_service.backtest_buy_and_hold(
"AAPL", days=365, initial_capital=10000
)