Files
invoice-master-poc-v2/tests/validation/test_vat_validator.py
2026-02-03 21:28:06 +01:00

324 lines
10 KiB
Python

"""
Tests for VAT Validator
Tests cross-validation of VAT information from multiple sources.
"""
import pytest
from backend.validation.vat_validator import (
VATValidationResult,
VATValidator,
MathCheckResult,
)
from backend.vat.vat_extractor import VATBreakdown, VATSummary
from backend.table.line_items_extractor import LineItem, LineItemsResult
class TestMathCheckResult:
"""Tests for MathCheckResult dataclass."""
def test_create_math_check_result(self):
"""Test creating a math check result."""
result = MathCheckResult(
rate=25.0,
base_amount=10000.0,
expected_vat=2500.0,
actual_vat=2500.0,
is_valid=True,
tolerance=0.01,
)
assert result.rate == 25.0
assert result.is_valid is True
def test_math_check_with_tolerance(self):
"""Test math check within tolerance."""
result = MathCheckResult(
rate=25.0,
base_amount=10000.0,
expected_vat=2500.0,
actual_vat=2500.01, # Within tolerance
is_valid=True,
tolerance=0.02,
)
assert result.is_valid is True
class TestVATValidationResult:
"""Tests for VATValidationResult dataclass."""
def test_create_validation_result(self):
"""Test creating a validation result."""
result = VATValidationResult(
is_valid=True,
confidence_score=0.95,
math_checks=[],
total_check=True,
line_items_vs_summary=True,
amount_consistency=True,
needs_review=False,
review_reasons=[],
)
assert result.is_valid is True
assert result.confidence_score == 0.95
assert result.needs_review is False
def test_validation_result_with_review_reasons(self):
"""Test validation result requiring review."""
result = VATValidationResult(
is_valid=False,
confidence_score=0.4,
math_checks=[],
total_check=False,
line_items_vs_summary=None,
amount_consistency=False,
needs_review=True,
review_reasons=["Math check failed", "Total mismatch"],
)
assert result.is_valid is False
assert result.needs_review is True
assert len(result.review_reasons) == 2
class TestVATValidator:
"""Tests for VATValidator."""
def test_validate_simple_vat(self):
"""Test validating simple single-rate VAT."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.9,
)
result = validator.validate(vat_summary)
assert result.is_valid is True
assert result.confidence_score >= 0.9
assert result.total_check is True
def test_validate_multiple_vat_rates(self):
"""Test validating multiple VAT rates."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="8 000,00", vat_amount="2 000,00", source="regex"),
VATBreakdown(rate=12.0, base_amount="2 000,00", vat_amount="240,00", source="regex"),
],
total_excl_vat="10 000,00",
total_vat="2 240,00",
total_incl_vat="12 240,00",
confidence=0.9,
)
result = validator.validate(vat_summary)
assert result.is_valid is True
assert len(result.math_checks) == 2
def test_validate_math_check_failure(self):
"""Test detecting math check failure."""
validator = VATValidator()
# VAT amount doesn't match rate
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="3 000,00", source="regex") # Should be 2500
],
total_excl_vat="10 000,00",
total_vat="3 000,00",
total_incl_vat="13 000,00",
confidence=0.9,
)
result = validator.validate(vat_summary)
assert result.is_valid is False
assert result.needs_review is True
assert any("Math" in reason or "math" in reason for reason in result.review_reasons)
def test_validate_total_mismatch(self):
"""Test detecting total amount mismatch."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="15 000,00", # Wrong - should be 12500
confidence=0.9,
)
result = validator.validate(vat_summary)
assert result.total_check is False
assert result.needs_review is True
def test_validate_with_line_items(self):
"""Test validation with line items comparison."""
validator = VATValidator()
line_items = LineItemsResult(
items=[
LineItem(row_index=0, description="Item 1", amount="5 000,00", vat_rate="25"),
LineItem(row_index=1, description="Item 2", amount="5 000,00", vat_rate="25"),
],
header_row=["Description", "Amount"],
raw_html="<table>...</table>",
)
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.9,
)
result = validator.validate(vat_summary, line_items=line_items)
assert result.line_items_vs_summary is not None
def test_validate_amount_consistency(self):
"""Test consistency check with extracted amount field."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.9,
)
# Existing amount field from YOLO extraction
existing_amount = "12 500,00"
result = validator.validate(vat_summary, existing_amount=existing_amount)
assert result.amount_consistency is True
def test_validate_amount_inconsistency(self):
"""Test detecting amount field inconsistency."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.9,
)
# Different amount from YOLO extraction
existing_amount = "15 000,00"
result = validator.validate(vat_summary, existing_amount=existing_amount)
assert result.amount_consistency is False
assert result.needs_review is True
def test_validate_empty_summary(self):
"""Test validating empty VAT summary."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[],
total_excl_vat=None,
total_vat=None,
total_incl_vat=None,
confidence=0.0,
)
result = validator.validate(vat_summary)
assert result.confidence_score == 0.0
assert result.is_valid is False
def test_validate_without_base_amounts(self):
"""Test validation when base amounts are not available."""
validator = VATValidator()
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount=None, vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.9,
)
result = validator.validate(vat_summary)
# Should still validate totals even without per-rate base amounts
assert result.total_check is True
def test_confidence_score_calculation(self):
"""Test confidence score calculation."""
validator = VATValidator()
# All checks pass - high confidence
vat_summary_good = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="2 500,00",
total_incl_vat="12 500,00",
confidence=0.95,
)
result_good = validator.validate(vat_summary_good)
# Some checks fail - lower confidence
vat_summary_bad = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="3 000,00", source="regex")
],
total_excl_vat="10 000,00",
total_vat="3 000,00",
total_incl_vat="12 500,00", # Doesn't match
confidence=0.5,
)
result_bad = validator.validate(vat_summary_bad)
assert result_good.confidence_score > result_bad.confidence_score
def test_tolerance_configuration(self):
"""Test configurable tolerance for math checks."""
# Strict tolerance
validator_strict = VATValidator(tolerance=0.001)
# Lenient tolerance
validator_lenient = VATValidator(tolerance=1.0)
vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10 000,00", vat_amount="2 500,50", source="regex") # Off by 0.50
],
total_excl_vat="10 000,00",
total_vat="2 500,50",
total_incl_vat="12 500,50",
confidence=0.9,
)
result_strict = validator_strict.validate(vat_summary)
result_lenient = validator_lenient.validate(vat_summary)
# Strict should fail, lenient should pass
assert result_strict.math_checks[0].is_valid is False
assert result_lenient.math_checks[0].is_valid is True