invoice-master-poc-v2/tests/validation/test_vat_validator.py

"""
Tests for VAT Validator

Tests cross-validation of VAT information from multiple sources.
"""

import pytest
from backend.validation.vat_validator import (
    VATValidationResult,
    VATValidator,
    MathCheckResult,
)
from backend.vat.vat_extractor import VATBreakdown, VATSummary
from backend.table.line_items_extractor import LineItem, LineItemsResult


class TestMathCheckResult:
    """Tests for MathCheckResult dataclass."""

    def test_create_math_check_result(self):
        """Test creating a math check result."""
        result = MathCheckResult(
            rate=25.0,
            base_amount=10000.0,
            expected_vat=2500.0,
            actual_vat=2500.0,
            is_valid=True,
            tolerance=0.01,
        )
        assert result.rate == 25.0
        assert result.is_valid is True

    def test_math_check_with_tolerance(self):
        """Test math check within tolerance."""
        result = MathCheckResult(
            rate=25.0,
            base_amount=10000.0,
            expected_vat=2500.0,
            actual_vat=2500.01,  # Within tolerance
            is_valid=True,
            tolerance=0.02,
        )
        assert result.is_valid is True


class TestVATValidationResult:
    """Tests for VATValidationResult dataclass."""

    def test_create_validation_result(self):
        """Test creating a validation result."""
        result = VATValidationResult(
            is_valid=True,
            confidence_score=0.95,
            math_checks=[],
            total_check=True,
            line_items_vs_summary=True,
            amount_consistency=True,
            needs_review=False,
            review_reasons=[],
        )
        assert result.is_valid is True
        assert result.confidence_score == 0.95
        assert result.needs_review is False

    def test_validation_result_with_review_reasons(self):
        """Test validation result requiring review."""
        result = VATValidationResult(
            is_valid=False,
            confidence_score=0.4,
            math_checks=[],
            total_check=False,
            line_items_vs_summary=None,
            amount_consistency=False,
            needs_review=True,
            review_reasons=["Math check failed", "Total mismatch"],
        )
        assert result.is_valid is False
        assert result.needs_review is True
        assert len(result.review_reasons) == 2


class TestVATValidator:
    """Tests for VATValidator."""

    def test_validate_simple_vat(self):
        """Test validating simple single-rate VAT."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.9,
        )

        result = validator.validate(vat_summary)

        assert result.is_valid is True
        assert result.confidence_score >= 0.9
        assert result.total_check is True

    def test_validate_multiple_vat_rates(self):
        """Test validating multiple VAT rates."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="8 000,00", vat_amount="2 000,00", source="regex"),
                VATBreakdown(rate=12.0, base_amount="2 000,00", vat_amount="240,00", source="regex"),
            ],
            total_excl_vat="10 000,00",
            total_vat="2 240,00",
            total_incl_vat="12 240,00",
            confidence=0.9,
        )

        result = validator.validate(vat_summary)

        assert result.is_valid is True
        assert len(result.math_checks) == 2

    def test_validate_math_check_failure(self):
        """Test detecting math check failure."""
        validator = VATValidator()

        # VAT amount doesn't match rate
        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="3 000,00", source="regex")  # Should be 2500
            ],
            total_excl_vat="10 000,00",
            total_vat="3 000,00",
            total_incl_vat="13 000,00",
            confidence=0.9,
        )

        result = validator.validate(vat_summary)

        assert result.is_valid is False
        assert result.needs_review is True
        assert any("Math" in reason or "math" in reason for reason in result.review_reasons)

    def test_validate_total_mismatch(self):
        """Test detecting total amount mismatch."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="15 000,00",  # Wrong - should be 12500
            confidence=0.9,
        )

        result = validator.validate(vat_summary)

        assert result.total_check is False
        assert result.needs_review is True

    def test_validate_with_line_items(self):
        """Test validation with line items comparison."""
        validator = VATValidator()

        line_items = LineItemsResult(
            items=[
                LineItem(row_index=0, description="Item 1", amount="5 000,00", vat_rate="25"),
                LineItem(row_index=1, description="Item 2", amount="5 000,00", vat_rate="25"),
            ],
            header_row=["Description", "Amount"],
            raw_html="<table>...</table>",
        )

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.9,
        )

        result = validator.validate(vat_summary, line_items=line_items)

        assert result.line_items_vs_summary is not None

    def test_validate_amount_consistency(self):
        """Test consistency check with extracted amount field."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.9,
        )

        # Existing amount field from YOLO extraction
        existing_amount = "12 500,00"

        result = validator.validate(vat_summary, existing_amount=existing_amount)

        assert result.amount_consistency is True

    def test_validate_amount_inconsistency(self):
        """Test detecting amount field inconsistency."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.9,
        )

        # Different amount from YOLO extraction
        existing_amount = "15 000,00"

        result = validator.validate(vat_summary, existing_amount=existing_amount)

        assert result.amount_consistency is False
        assert result.needs_review is True

    def test_validate_empty_summary(self):
        """Test validating empty VAT summary."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[],
            total_excl_vat=None,
            total_vat=None,
            total_incl_vat=None,
            confidence=0.0,
        )

        result = validator.validate(vat_summary)

        assert result.confidence_score == 0.0
        assert result.is_valid is False

    def test_validate_without_base_amounts(self):
        """Test validation when base amounts are not available."""
        validator = VATValidator()

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount=None, vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.9,
        )

        result = validator.validate(vat_summary)

        # Should still validate totals even without per-rate base amounts
        assert result.total_check is True

    def test_confidence_score_calculation(self):
        """Test confidence score calculation."""
        validator = VATValidator()

        # All checks pass - high confidence
        vat_summary_good = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,00",
            total_incl_vat="12 500,00",
            confidence=0.95,
        )
        result_good = validator.validate(vat_summary_good)

        # Some checks fail - lower confidence
        vat_summary_bad = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="3 000,00", source="regex")
            ],
            total_excl_vat="10 000,00",
            total_vat="3 000,00",
            total_incl_vat="12 500,00",  # Doesn't match
            confidence=0.5,
        )
        result_bad = validator.validate(vat_summary_bad)

        assert result_good.confidence_score > result_bad.confidence_score

    def test_tolerance_configuration(self):
        """Test configurable tolerance for math checks."""
        # Strict tolerance
        validator_strict = VATValidator(tolerance=0.001)
        # Lenient tolerance
        validator_lenient = VATValidator(tolerance=1.0)

        vat_summary = VATSummary(
            breakdowns=[
                VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,50", source="regex")  # Off by 0.50
            ],
            total_excl_vat="10 000,00",
            total_vat="2 500,50",
            total_incl_vat="12 500,50",
            confidence=0.9,
        )

        result_strict = validator_strict.validate(vat_summary)
        result_lenient = validator_lenient.validate(vat_summary)

        # Strict should fail, lenient should pass
        assert result_strict.math_checks[0].is_valid is False
        assert result_lenient.math_checks[0].is_valid is True