feat: add backtesting engine with 4 strategies (TDD)

Strategies: - POST /backtest/sma-crossover - SMA crossover (short/long window) - POST /backtest/rsi - RSI oversold/overbought signals - POST /backtest/buy-and-hold - passive benchmark - POST /backtest/momentum - multi-symbol momentum rotation Returns: total_return, annualized_return, sharpe_ratio, max_drawdown, win_rate, total_trades, equity_curve (last 20 points) Implementation: pure pandas/numpy, no external backtesting libs. Shared _compute_metrics helper across all strategies. 79 new tests (46 service unit + 33 route integration). All 391 tests passing.
2026-03-19 22:35:00 +01:00
parent 42ba359c48
commit 5c7a0ee4c0
5 changed files with 1585 additions and 0 deletions
--- a/tests/test_backtest_service.py
+++ b/tests/test_backtest_service.py
@@ -0,0 +1,627 @@
+"""Unit tests for backtest_service - written FIRST (TDD RED phase)."""
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import backtest_service
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_equity(values: list[float]) -> pd.Series:
+    """Build a simple equity-curve Series from a list of values."""
+    return pd.Series(values, dtype=float)
+
+
+def _rising_prices(n: int = 100, start: float = 100.0, step: float = 1.0) -> pd.Series:
+    """Linearly rising price series."""
+    return pd.Series([start + i * step for i in range(n)], dtype=float)
+
+
+def _flat_prices(n: int = 100, price: float = 100.0) -> pd.Series:
+    """Flat price series - no movement."""
+    return pd.Series([price] * n, dtype=float)
+
+
+def _oscillating_prices(n: int = 200, period: int = 40) -> pd.Series:
+    """Sinusoidal price series to generate crossover signals."""
+    t = np.arange(n)
+    prices = 100 + 20 * np.sin(2 * np.pi * t / period)
+    return pd.Series(prices, dtype=float)
+
+
+# ---------------------------------------------------------------------------
+# _compute_metrics tests
+# ---------------------------------------------------------------------------
+
+
+class TestComputeMetrics:
+    def test_total_return_positive(self):
+        equity = _make_equity([10000, 11000, 12000])
+        result = backtest_service._compute_metrics(equity, trades=1)
+        assert result["total_return"] == pytest.approx(0.2, abs=1e-6)
+
+    def test_total_return_negative(self):
+        equity = _make_equity([10000, 9000, 8000])
+        result = backtest_service._compute_metrics(equity, trades=1)
+        assert result["total_return"] == pytest.approx(-0.2, abs=1e-6)
+
+    def test_total_return_zero_on_flat(self):
+        equity = _make_equity([10000, 10000, 10000])
+        result = backtest_service._compute_metrics(equity, trades=0)
+        assert result["total_return"] == pytest.approx(0.0, abs=1e-6)
+
+    def test_annualized_return_shape(self):
+        # 252 daily bars => 1 trading year; 10000 -> 11000 = +10% annualized
+        values = [10000 * (1.0 + 0.1 / 252) ** i for i in range(253)]
+        equity = _make_equity(values)
+        result = backtest_service._compute_metrics(equity, trades=5)
+        # Should be close to 10% annualized
+        assert result["annualized_return"] == pytest.approx(0.1, abs=0.01)
+
+    def test_sharpe_ratio_positive_drift(self):
+        # Steadily rising equity with small daily increments -> positive Sharpe
+        values = [10000 + i * 10 for i in range(252)]
+        equity = _make_equity(values)
+        result = backtest_service._compute_metrics(equity, trades=5)
+        assert result["sharpe_ratio"] > 0
+
+    def test_sharpe_ratio_none_on_single_point(self):
+        equity = _make_equity([10000])
+        result = backtest_service._compute_metrics(equity, trades=0)
+        assert result["sharpe_ratio"] is None
+
+    def test_sharpe_ratio_none_on_zero_std(self):
+        # Perfectly flat equity => std = 0, Sharpe undefined
+        equity = _make_equity([10000] * 50)
+        result = backtest_service._compute_metrics(equity, trades=0)
+        assert result["sharpe_ratio"] is None
+
+    def test_max_drawdown_known_value(self):
+        # Peak 12000, trough 8000 => drawdown = (8000-12000)/12000 = -1/3
+        equity = _make_equity([10000, 12000, 8000, 9000])
+        result = backtest_service._compute_metrics(equity, trades=2)
+        assert result["max_drawdown"] == pytest.approx(-1 / 3, abs=1e-6)
+
+    def test_max_drawdown_zero_on_monotone_rise(self):
+        equity = _make_equity([10000, 11000, 12000, 13000])
+        result = backtest_service._compute_metrics(equity, trades=1)
+        assert result["max_drawdown"] == pytest.approx(0.0, abs=1e-6)
+
+    def test_total_trades_propagated(self):
+        equity = _make_equity([10000, 11000])
+        result = backtest_service._compute_metrics(equity, trades=7)
+        assert result["total_trades"] == 7
+
+    def test_win_rate_zero_trades(self):
+        equity = _make_equity([10000, 10000])
+        result = backtest_service._compute_metrics(equity, trades=0)
+        assert result["win_rate"] is None
+
+    def test_equity_curve_last_20_points(self):
+        values = list(range(100, 160))  # 60 points
+        equity = _make_equity(values)
+        result = backtest_service._compute_metrics(equity, trades=10)
+        assert len(result["equity_curve"]) == 20
+        assert result["equity_curve"][-1] == pytest.approx(159.0, abs=1e-6)
+
+    def test_equity_curve_shorter_than_20(self):
+        values = [10000, 11000, 12000]
+        equity = _make_equity(values)
+        result = backtest_service._compute_metrics(equity, trades=1)
+        assert len(result["equity_curve"]) == 3
+
+    def test_result_keys_present(self):
+        equity = _make_equity([10000, 11000])
+        result = backtest_service._compute_metrics(equity, trades=1)
+        expected_keys = {
+            "total_return",
+            "annualized_return",
+            "sharpe_ratio",
+            "max_drawdown",
+            "win_rate",
+            "total_trades",
+            "equity_curve",
+        }
+        assert expected_keys.issubset(result.keys())
+
+
+# ---------------------------------------------------------------------------
+# _compute_sma_signals tests
+# ---------------------------------------------------------------------------
+
+
+class TestComputeSmaSignals:
+    def test_returns_series_with_position_column(self):
+        prices = _oscillating_prices(200, period=40)
+        positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
+        assert isinstance(positions, pd.Series)
+        assert len(positions) == len(prices)
+
+    def test_positions_are_zero_or_one(self):
+        prices = _oscillating_prices(200, period=40)
+        positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
+        unique_vals = set(positions.dropna().unique())
+        assert unique_vals.issubset({0, 1})
+
+    def test_no_position_before_long_window(self):
+        prices = _oscillating_prices(200, period=40)
+        positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
+        # Before long_window-1 data points, positions should be 0
+        assert (positions.iloc[: 19] == 0).all()
+
+    def test_generates_at_least_one_signal_on_oscillating(self):
+        prices = _oscillating_prices(300, period=60)
+        positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
+        # Should flip between 0 and 1 at least once on oscillating data
+        changes = positions.diff().abs().sum()
+        assert changes > 0
+
+    def test_flat_prices_produce_no_signals(self):
+        prices = _flat_prices(100)
+        positions = backtest_service._compute_sma_signals(prices, short_window=5, long_window=20)
+        # After warm-up both SMAs equal price; short never strictly above long
+        assert (positions == 0).all()
+
+
+# ---------------------------------------------------------------------------
+# _compute_rsi tests
+# ---------------------------------------------------------------------------
+
+
+class TestComputeRsi:
+    def test_rsi_length(self):
+        prices = _rising_prices(50)
+        rsi = backtest_service._compute_rsi(prices, period=14)
+        assert len(rsi) == len(prices)
+
+    def test_rsi_range(self):
+        prices = _oscillating_prices(100, period=20)
+        rsi = backtest_service._compute_rsi(prices, period=14)
+        valid = rsi.dropna()
+        assert (valid >= 0).all()
+        assert (valid <= 100).all()
+
+    def test_rsi_rising_prices_high(self):
+        # Monotonically rising prices => RSI should be high (>= 70)
+        prices = _rising_prices(80, step=1.0)
+        rsi = backtest_service._compute_rsi(prices, period=14)
+        # After warm-up period, RSI should be very high
+        assert rsi.iloc[-1] >= 70
+
+    def test_rsi_falling_prices_low(self):
+        # Monotonically falling prices => RSI should be low (<= 30)
+        prices = pd.Series([100 - i * 0.8 for i in range(80)], dtype=float)
+        rsi = backtest_service._compute_rsi(prices, period=14)
+        assert rsi.iloc[-1] <= 30
+
+
+# ---------------------------------------------------------------------------
+# _compute_rsi_signals tests
+# ---------------------------------------------------------------------------
+
+
+class TestComputeRsiSignals:
+    def test_returns_series(self):
+        prices = _oscillating_prices(200, period=40)
+        positions = backtest_service._compute_rsi_signals(
+            prices, period=14, oversold=30, overbought=70
+        )
+        assert isinstance(positions, pd.Series)
+        assert len(positions) == len(prices)
+
+    def test_positions_are_zero_or_one(self):
+        prices = _oscillating_prices(200, period=40)
+        positions = backtest_service._compute_rsi_signals(
+            prices, period=14, oversold=30, overbought=70
+        )
+        unique_vals = set(positions.dropna().unique())
+        assert unique_vals.issubset({0, 1})
+
+
+# ---------------------------------------------------------------------------
+# backtest_sma_crossover tests (async integration of service layer)
+# ---------------------------------------------------------------------------
+
+
+class TestBacktestSmaCrossover:
+    @pytest.fixture
+    def mock_hist(self, monkeypatch):
+        """Patch fetch_historical to return a synthetic OBBject-like result."""
+        prices = _oscillating_prices(300, period=60).tolist()
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        return FakeResult()
+
+    @pytest.mark.asyncio
+    async def test_returns_all_required_keys(self, mock_hist):
+        result = await backtest_service.backtest_sma_crossover(
+            "AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
+        )
+        required = {
+            "total_return",
+            "annualized_return",
+            "sharpe_ratio",
+            "max_drawdown",
+            "win_rate",
+            "total_trades",
+            "equity_curve",
+        }
+        assert required.issubset(result.keys())
+
+    @pytest.mark.asyncio
+    async def test_equity_curve_max_20_points(self, mock_hist):
+        result = await backtest_service.backtest_sma_crossover(
+            "AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
+        )
+        assert len(result["equity_curve"]) <= 20
+
+    @pytest.mark.asyncio
+    async def test_raises_value_error_on_no_data(self, monkeypatch):
+        async def fake_fetch(symbol, days, **kwargs):
+            return None
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="No historical data"):
+            await backtest_service.backtest_sma_crossover(
+                "AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
+            )
+
+    @pytest.mark.asyncio
+    async def test_initial_capital_reflected_in_equity(self, mock_hist):
+        result = await backtest_service.backtest_sma_crossover(
+            "AAPL", short_window=5, long_window=20, days=365, initial_capital=50000
+        )
+        # equity_curve values should be in range related to 50000 initial capital
+        assert result["equity_curve"][0] > 0
+
+
+# ---------------------------------------------------------------------------
+# backtest_rsi tests
+# ---------------------------------------------------------------------------
+
+
+class TestBacktestRsi:
+    @pytest.fixture
+    def mock_hist(self, monkeypatch):
+        prices = _oscillating_prices(300, period=60).tolist()
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+
+    @pytest.mark.asyncio
+    async def test_returns_all_required_keys(self, mock_hist):
+        result = await backtest_service.backtest_rsi(
+            "AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
+        )
+        required = {
+            "total_return",
+            "annualized_return",
+            "sharpe_ratio",
+            "max_drawdown",
+            "win_rate",
+            "total_trades",
+            "equity_curve",
+        }
+        assert required.issubset(result.keys())
+
+    @pytest.mark.asyncio
+    async def test_equity_curve_max_20_points(self, mock_hist):
+        result = await backtest_service.backtest_rsi(
+            "AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
+        )
+        assert len(result["equity_curve"]) <= 20
+
+    @pytest.mark.asyncio
+    async def test_raises_value_error_on_no_data(self, monkeypatch):
+        async def fake_fetch(symbol, days, **kwargs):
+            return None
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="No historical data"):
+            await backtest_service.backtest_rsi(
+                "AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
+            )
+
+
+# ---------------------------------------------------------------------------
+# backtest_buy_and_hold tests
+# ---------------------------------------------------------------------------
+
+
+class TestBacktestBuyAndHold:
+    @pytest.fixture
+    def mock_hist_rising(self, monkeypatch):
+        prices = _rising_prices(252, start=100.0, step=1.0).tolist()
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+
+    @pytest.mark.asyncio
+    async def test_returns_all_required_keys(self, mock_hist_rising):
+        result = await backtest_service.backtest_buy_and_hold(
+            "AAPL", days=365, initial_capital=10000
+        )
+        required = {
+            "total_return",
+            "annualized_return",
+            "sharpe_ratio",
+            "max_drawdown",
+            "win_rate",
+            "total_trades",
+            "equity_curve",
+        }
+        assert required.issubset(result.keys())
+
+    @pytest.mark.asyncio
+    async def test_total_trades_always_one(self, mock_hist_rising):
+        result = await backtest_service.backtest_buy_and_hold(
+            "AAPL", days=365, initial_capital=10000
+        )
+        assert result["total_trades"] == 1
+
+    @pytest.mark.asyncio
+    async def test_rising_prices_positive_return(self, mock_hist_rising):
+        result = await backtest_service.backtest_buy_and_hold(
+            "AAPL", days=365, initial_capital=10000
+        )
+        assert result["total_return"] > 0
+
+    @pytest.mark.asyncio
+    async def test_known_return_value(self, monkeypatch):
+        # 100 -> 200: 100% total return
+        prices = [100.0, 200.0]
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        result = await backtest_service.backtest_buy_and_hold(
+            "AAPL", days=365, initial_capital=10000
+        )
+        assert result["total_return"] == pytest.approx(1.0, abs=1e-6)
+        assert result["equity_curve"][-1] == pytest.approx(20000.0, abs=1e-6)
+
+    @pytest.mark.asyncio
+    async def test_raises_value_error_on_no_data(self, monkeypatch):
+        async def fake_fetch(symbol, days, **kwargs):
+            return None
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="No historical data"):
+            await backtest_service.backtest_buy_and_hold("AAPL", days=365, initial_capital=10000)
+
+    @pytest.mark.asyncio
+    async def test_flat_prices_zero_return(self, monkeypatch):
+        prices = _flat_prices(50).tolist()
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        result = await backtest_service.backtest_buy_and_hold(
+            "AAPL", days=365, initial_capital=10000
+        )
+        assert result["total_return"] == pytest.approx(0.0, abs=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# backtest_momentum tests
+# ---------------------------------------------------------------------------
+
+
+class TestBacktestMomentum:
+    @pytest.fixture
+    def mock_multi_hist(self, monkeypatch):
+        """Three symbols with different return profiles."""
+        aapl_prices = _rising_prices(200, start=100.0, step=2.0).tolist()
+        msft_prices = _rising_prices(200, start=100.0, step=0.5).tolist()
+        googl_prices = _flat_prices(200, price=150.0).tolist()
+
+        price_map = {
+            "AAPL": aapl_prices,
+            "MSFT": msft_prices,
+            "GOOGL": googl_prices,
+        }
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            def __init__(self, prices):
+                self.results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult(price_map[symbol])
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+
+    @pytest.mark.asyncio
+    async def test_returns_all_required_keys(self, mock_multi_hist):
+        result = await backtest_service.backtest_momentum(
+            symbols=["AAPL", "MSFT", "GOOGL"],
+            lookback=20,
+            top_n=2,
+            rebalance_days=30,
+            days=365,
+            initial_capital=10000,
+        )
+        required = {
+            "total_return",
+            "annualized_return",
+            "sharpe_ratio",
+            "max_drawdown",
+            "win_rate",
+            "total_trades",
+            "equity_curve",
+            "allocation_history",
+        }
+        assert required.issubset(result.keys())
+
+    @pytest.mark.asyncio
+    async def test_allocation_history_is_list(self, mock_multi_hist):
+        result = await backtest_service.backtest_momentum(
+            symbols=["AAPL", "MSFT", "GOOGL"],
+            lookback=20,
+            top_n=2,
+            rebalance_days=30,
+            days=365,
+            initial_capital=10000,
+        )
+        assert isinstance(result["allocation_history"], list)
+
+    @pytest.mark.asyncio
+    async def test_top_n_respected_in_allocations(self, mock_multi_hist):
+        result = await backtest_service.backtest_momentum(
+            symbols=["AAPL", "MSFT", "GOOGL"],
+            lookback=20,
+            top_n=2,
+            rebalance_days=30,
+            days=365,
+            initial_capital=10000,
+        )
+        for entry in result["allocation_history"]:
+            assert len(entry["symbols"]) <= 2
+
+    @pytest.mark.asyncio
+    async def test_raises_value_error_on_no_data(self, monkeypatch):
+        async def fake_fetch(symbol, days, **kwargs):
+            return None
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="No price data"):
+            await backtest_service.backtest_momentum(
+                symbols=["AAPL", "MSFT"],
+                lookback=20,
+                top_n=1,
+                rebalance_days=30,
+                days=365,
+                initial_capital=10000,
+            )
+
+    @pytest.mark.asyncio
+    async def test_equity_curve_max_20_points(self, mock_multi_hist):
+        result = await backtest_service.backtest_momentum(
+            symbols=["AAPL", "MSFT", "GOOGL"],
+            lookback=20,
+            top_n=2,
+            rebalance_days=30,
+            days=365,
+            initial_capital=10000,
+        )
+        assert len(result["equity_curve"]) <= 20
+
+
+# ---------------------------------------------------------------------------
+# Edge case: insufficient data
+# ---------------------------------------------------------------------------
+
+
+class TestEdgeCases:
+    @pytest.mark.asyncio
+    async def test_sma_crossover_insufficient_bars_raises(self, monkeypatch):
+        """Fewer bars than long_window should raise ValueError."""
+        prices = [100.0, 101.0, 102.0]  # Only 3 bars
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="Insufficient data"):
+            await backtest_service.backtest_sma_crossover(
+                "AAPL", short_window=5, long_window=20, days=365, initial_capital=10000
+            )
+
+    @pytest.mark.asyncio
+    async def test_rsi_insufficient_bars_raises(self, monkeypatch):
+        prices = [100.0, 101.0]
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="Insufficient data"):
+            await backtest_service.backtest_rsi(
+                "AAPL", period=14, oversold=30, overbought=70, days=365, initial_capital=10000
+            )
+
+    @pytest.mark.asyncio
+    async def test_buy_and_hold_single_bar_raises(self, monkeypatch):
+        prices = [100.0]
+
+        class FakeBar:
+            def __init__(self, close):
+                self.close = close
+
+        class FakeResult:
+            results = [FakeBar(p) for p in prices]
+
+        async def fake_fetch(symbol, days, **kwargs):
+            return FakeResult()
+
+        monkeypatch.setattr(backtest_service, "fetch_historical", fake_fetch)
+        with pytest.raises(ValueError, match="Insufficient data"):
+            await backtest_service.backtest_buy_and_hold(
+                "AAPL", days=365, initial_capital=10000
+            )