Files
invoice-master-poc-v2/tests/services/test_gating_validator.py
Yaojia Wang ad5ed46b4c WIP
2026-02-11 23:40:38 +01:00

541 lines
21 KiB
Python

"""
Unit tests for gating validation service.
Tests the quality gate validation logic for model deployment:
- Gate 1: mAP regression validation
- Gate 2: detection rate validation
- Overall status computation
- Full validation workflow with mocked dependencies
"""
import pytest
from unittest.mock import MagicMock, Mock, patch
from uuid import UUID, uuid4
from backend.web.services.gating_validator import (
GATE1_PASS_THRESHOLD,
GATE1_REVIEW_THRESHOLD,
GATE2_PASS_THRESHOLD,
classify_gate1,
classify_gate2,
compute_overall_status,
run_gating_validation,
)
from backend.data.admin_models import GatingResult
class TestClassifyGate1:
"""Test Gate 1 classification (mAP drop thresholds)."""
def test_pass_below_threshold(self):
"""Test mAP drop < 0.01 returns pass."""
assert classify_gate1(0.009) == "pass"
assert classify_gate1(0.005) == "pass"
assert classify_gate1(0.0) == "pass"
assert classify_gate1(-0.01) == "pass" # negative drop (improvement)
def test_pass_boundary(self):
"""Test mAP drop exactly at pass threshold."""
# 0.01 should be review (not pass), since condition is < 0.01
assert classify_gate1(GATE1_PASS_THRESHOLD) == "review"
def test_review_in_range(self):
"""Test mAP drop in review range [0.01, 0.03)."""
assert classify_gate1(0.01) == "review"
assert classify_gate1(0.015) == "review"
assert classify_gate1(0.02) == "review"
assert classify_gate1(0.029) == "review"
def test_review_boundary(self):
"""Test mAP drop exactly at review threshold."""
# 0.03 should be reject (not review), since condition is < 0.03
assert classify_gate1(GATE1_REVIEW_THRESHOLD) == "reject"
def test_reject_above_threshold(self):
"""Test mAP drop >= 0.03 returns reject."""
assert classify_gate1(0.03) == "reject"
assert classify_gate1(0.05) == "reject"
assert classify_gate1(0.10) == "reject"
assert classify_gate1(1.0) == "reject"
class TestClassifyGate2:
"""Test Gate 2 classification (detection rate thresholds)."""
def test_pass_above_threshold(self):
"""Test detection rate >= 0.80 returns pass."""
assert classify_gate2(0.80) == "pass"
assert classify_gate2(0.85) == "pass"
assert classify_gate2(0.99) == "pass"
assert classify_gate2(1.0) == "pass"
def test_pass_boundary(self):
"""Test detection rate exactly at pass threshold."""
assert classify_gate2(GATE2_PASS_THRESHOLD) == "pass"
def test_review_below_threshold(self):
"""Test detection rate < 0.80 returns review."""
assert classify_gate2(0.79) == "review"
assert classify_gate2(0.75) == "review"
assert classify_gate2(0.50) == "review"
assert classify_gate2(0.0) == "review"
class TestComputeOverallStatus:
"""Test overall status computation from individual gates."""
def test_both_pass(self):
"""Test both gates pass -> overall pass."""
assert compute_overall_status("pass", "pass") == "pass"
def test_gate1_reject_gate2_pass(self):
"""Test any reject -> overall reject."""
assert compute_overall_status("reject", "pass") == "reject"
def test_gate1_pass_gate2_reject(self):
"""Test any reject -> overall reject."""
assert compute_overall_status("pass", "reject") == "reject"
def test_both_reject(self):
"""Test both reject -> overall reject."""
assert compute_overall_status("reject", "reject") == "reject"
def test_gate1_review_gate2_pass(self):
"""Test any review (no reject) -> overall review."""
assert compute_overall_status("review", "pass") == "review"
def test_gate1_pass_gate2_review(self):
"""Test any review (no reject) -> overall review."""
assert compute_overall_status("pass", "review") == "review"
def test_both_review(self):
"""Test both review -> overall review."""
assert compute_overall_status("review", "review") == "review"
def test_gate1_reject_gate2_review(self):
"""Test reject takes precedence over review."""
assert compute_overall_status("reject", "review") == "reject"
def test_gate1_review_gate2_reject(self):
"""Test reject takes precedence over review."""
assert compute_overall_status("review", "reject") == "reject"
class TestRunGatingValidation:
"""Test full gating validation workflow with mocked dependencies."""
@pytest.fixture
def mock_model_version_id(self):
"""Generate a model version ID for testing."""
return uuid4()
@pytest.fixture
def mock_base_model_version_id(self):
"""Generate a base model version ID for testing."""
return uuid4()
@pytest.fixture
def mock_task_id(self):
"""Generate a task ID for testing."""
return uuid4()
@pytest.fixture
def mock_base_model(self):
"""Create a mock base model with metrics."""
model = Mock()
model.metrics_mAP = 0.85
return model
@pytest.fixture
def mock_new_model(self):
"""Create a mock new model with metrics."""
model = Mock()
model.metrics_mAP = 0.82
return model
def test_gate1_pass_gate2_pass(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test validation with both gates passing."""
# Setup: base mAP=0.85, new mAP=0.84 -> drop=0.01 (review)
# But new model mAP=0.82 -> gate2 pass
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.82
mock_val_metrics = {"mAP50": 0.84}
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
# Mock repository
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
# Mock session context
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
# Mock YOLO trainer
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
# Execute
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
# Verify
assert result.gate1_status == "review" # 0.85 - 0.84 = 0.01
assert result.gate1_original_mAP == 0.85
assert result.gate1_new_mAP == 0.84
assert result.gate1_mAP_drop == pytest.approx(0.01, abs=1e-6)
assert result.gate2_status == "pass" # 0.82 >= 0.80
assert result.gate2_detection_rate == 0.82
assert result.overall_status == "review" # Any review -> review
# Verify DB operations
mock_session.add.assert_called()
mock_session.commit.assert_called()
mock_update.assert_called_once_with(str(mock_model_version_id), "review")
def test_gate1_reject_due_to_large_drop(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 1 reject when mAP drop >= 0.03."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.82
mock_val_metrics = {"mAP50": 0.81} # 0.85 - 0.81 = 0.04 (reject)
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "reject"
assert result.gate1_mAP_drop == pytest.approx(0.04, abs=1e-6)
assert result.overall_status == "reject" # Any reject -> reject
mock_update.assert_called_once_with(str(mock_model_version_id), "reject")
def test_gate2_review_due_to_low_detection_rate(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 2 review when detection rate < 0.80."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.75 # Below 0.80 threshold
mock_val_metrics = {"mAP50": 0.845} # Gate 1: 0.85 - 0.845 = 0.005 (pass)
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "pass"
assert result.gate2_status == "review" # 0.75 < 0.80
assert result.gate2_detection_rate == 0.75
assert result.overall_status == "review"
mock_update.assert_called_once_with(str(mock_model_version_id), "review")
def test_no_base_model_skips_gate1(
self,
mock_model_version_id,
mock_task_id,
mock_new_model,
):
"""Test Gate 1 passes when no base model is provided."""
mock_new_model.metrics_mAP = 0.85
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.return_value = mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=None,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "pass" # Skipped
assert result.gate1_original_mAP is None
assert result.gate1_new_mAP is None
assert result.gate1_mAP_drop is None
assert result.gate2_status == "pass" # 0.85 >= 0.80
assert result.overall_status == "pass"
mock_update.assert_called_once_with(str(mock_model_version_id), "pass")
def test_base_model_without_metrics_skips_gate1(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 1 passes when base model has no metrics."""
mock_base_model.metrics_mAP = None
mock_new_model.metrics_mAP = 0.85
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "pass" # Skipped due to no base metrics
assert result.gate2_status == "pass"
assert result.overall_status == "pass"
def test_validation_failure_marks_gate1_review(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 1 review when validation raises exception."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.82
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
# Mock trainer to raise exception
mock_trainer = MockTrainer.return_value
mock_trainer.validate.side_effect = RuntimeError("Validation failed")
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "review" # Exception -> review
assert result.gate2_status == "pass"
assert result.overall_status == "review"
mock_update.assert_called_once_with(str(mock_model_version_id), "review")
def test_validation_returns_none_mAP_marks_gate1_review(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 1 review when validation returns None mAP."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.82
mock_val_metrics = {"mAP50": None} # No mAP returned
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "review" # None mAP -> review
assert result.gate1_new_mAP is None
assert result.gate2_status == "pass"
assert result.overall_status == "review"
def test_gate2_exception_marks_gate2_review(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test Gate 2 review when accessing new model metrics raises exception."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.82
mock_val_metrics = {"mAP50": 0.84}
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
# Mock to raise exception for new model on second call
def get_side_effect(id):
if str(id) == str(mock_base_model_version_id):
return mock_base_model
elif str(id) == str(mock_model_version_id):
raise RuntimeError("Cannot fetch new model")
return None
mock_repo.get.side_effect = get_side_effect
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
result = run_gating_validation(
model_version_id=mock_model_version_id,
new_model_path="/path/to/model.pt",
base_model_version_id=mock_base_model_version_id,
data_yaml="/path/to/data.yaml",
task_id=mock_task_id,
)
assert result.gate1_status == "review" # 0.85 - 0.84 = 0.01
assert result.gate2_status == "review" # Exception -> review
assert result.overall_status == "review"
def test_string_uuids_accepted(
self,
mock_model_version_id,
mock_base_model_version_id,
mock_task_id,
mock_base_model,
mock_new_model,
):
"""Test that string UUIDs are accepted and converted properly."""
mock_base_model.metrics_mAP = 0.85
mock_new_model.metrics_mAP = 0.85
mock_val_metrics = {"mAP50": 0.85}
with patch("backend.web.services.gating_validator.ModelVersionRepository") as MockRepo, \
patch("backend.web.services.gating_validator.get_session_context") as mock_session_ctx, \
patch("shared.training.YOLOTrainer") as MockTrainer, \
patch("backend.web.services.gating_validator._update_model_gating_status") as mock_update:
mock_repo = MockRepo.return_value
mock_repo.get.side_effect = lambda id: mock_base_model if str(id) == str(mock_base_model_version_id) else mock_new_model
mock_session = MagicMock()
mock_session_ctx.return_value.__enter__.return_value = mock_session
mock_trainer = MockTrainer.return_value
mock_trainer.validate.return_value = mock_val_metrics
# Pass string UUIDs
result = run_gating_validation(
model_version_id=str(mock_model_version_id),
new_model_path="/path/to/model.pt",
base_model_version_id=str(mock_base_model_version_id),
data_yaml="/path/to/data.yaml",
task_id=str(mock_task_id),
)
assert result.model_version_id == mock_model_version_id
assert result.task_id == mock_task_id
assert result.overall_status == "pass"