Update paddle, and support invoice line item

This commit is contained in:
Yaojia Wang
2026-02-03 21:28:06 +01:00
parent c4e3773df1
commit 35988b1ebf
41 changed files with 6832 additions and 48 deletions

View File

@@ -750,7 +750,7 @@ class TestNormalizerRegistry:
assert "Amount" in registry
assert "InvoiceDate" in registry
assert "InvoiceDueDate" in registry
assert "supplier_org_number" in registry
assert "supplier_organisation_number" in registry
def test_registry_with_enhanced(self):
registry = create_normalizer_registry(use_enhanced=True)

View File

@@ -322,5 +322,180 @@ class TestAmountNormalization:
assert normalized == '11699'
class TestBusinessFeatures:
"""Tests for business invoice features (line items, VAT, validation)."""
def test_inference_result_has_business_fields(self):
"""Test that InferenceResult has business feature fields."""
result = InferenceResult()
assert result.line_items is None
assert result.vat_summary is None
assert result.vat_validation is None
def test_to_json_without_business_features(self):
"""Test to_json works without business features."""
result = InferenceResult()
result.fields = {'InvoiceNumber': '12345'}
result.confidence = {'InvoiceNumber': 0.95}
json_result = result.to_json()
assert json_result['InvoiceNumber'] == '12345'
assert 'line_items' not in json_result
assert 'vat_summary' not in json_result
assert 'vat_validation' not in json_result
def test_to_json_with_line_items(self):
"""Test to_json includes line items when present."""
from backend.table.line_items_extractor import LineItem, LineItemsResult
result = InferenceResult()
result.fields = {'Amount': '12500.00'}
result.line_items = LineItemsResult(
items=[
LineItem(
row_index=0,
description="Product A",
quantity="2",
unit_price="5000,00",
amount="10000,00",
vat_rate="25",
confidence=0.9
)
],
header_row=["Beskrivning", "Antal", "Pris", "Belopp", "Moms"],
raw_html="<table>...</table>"
)
json_result = result.to_json()
assert 'line_items' in json_result
assert len(json_result['line_items']['items']) == 1
assert json_result['line_items']['items'][0]['description'] == "Product A"
assert json_result['line_items']['items'][0]['amount'] == "10000,00"
def test_to_json_with_vat_summary(self):
"""Test to_json includes VAT summary when present."""
from backend.vat.vat_extractor import VATBreakdown, VATSummary
result = InferenceResult()
result.vat_summary = VATSummary(
breakdowns=[
VATBreakdown(rate=25.0, base_amount="10000,00", vat_amount="2500,00", source="regex")
],
total_excl_vat="10000,00",
total_vat="2500,00",
total_incl_vat="12500,00",
confidence=0.9
)
json_result = result.to_json()
assert 'vat_summary' in json_result
assert len(json_result['vat_summary']['breakdowns']) == 1
assert json_result['vat_summary']['breakdowns'][0]['rate'] == 25.0
assert json_result['vat_summary']['total_incl_vat'] == "12500,00"
def test_to_json_with_vat_validation(self):
"""Test to_json includes VAT validation when present."""
from backend.validation.vat_validator import VATValidationResult, MathCheckResult
result = InferenceResult()
result.vat_validation = VATValidationResult(
is_valid=True,
confidence_score=0.95,
math_checks=[
MathCheckResult(
rate=25.0,
base_amount=10000.0,
expected_vat=2500.0,
actual_vat=2500.0,
is_valid=True,
tolerance=0.5
)
],
total_check=True,
line_items_vs_summary=True,
amount_consistency=True,
needs_review=False,
review_reasons=[]
)
json_result = result.to_json()
assert 'vat_validation' in json_result
assert json_result['vat_validation']['is_valid'] is True
assert json_result['vat_validation']['confidence_score'] == 0.95
assert len(json_result['vat_validation']['math_checks']) == 1
class TestBusinessFeaturesAvailable:
"""Tests for BUSINESS_FEATURES_AVAILABLE flag."""
def test_business_features_available(self):
"""Test that business features are available."""
from backend.pipeline import BUSINESS_FEATURES_AVAILABLE
assert BUSINESS_FEATURES_AVAILABLE is True
class TestExtractBusinessFeaturesErrorHandling:
"""Tests for _extract_business_features error handling."""
def test_pipeline_module_has_logger(self):
"""Test that pipeline module defines logger correctly."""
from backend.pipeline import pipeline
assert hasattr(pipeline, 'logger')
assert pipeline.logger is not None
def test_extract_business_features_logs_errors(self):
"""Test that _extract_business_features logs detailed errors."""
from backend.pipeline.pipeline import InferencePipeline, InferenceResult
# Create a pipeline with mocked extractors that raise an exception
with patch.object(InferencePipeline, '__init__', lambda self, **kwargs: None):
pipeline = InferencePipeline()
pipeline.line_items_extractor = MagicMock()
pipeline.vat_extractor = MagicMock()
pipeline.vat_validator = MagicMock()
# Make line_items_extractor raise an exception
test_error = ValueError("Test error message")
pipeline.line_items_extractor.extract_from_pdf.side_effect = test_error
result = InferenceResult()
# Call the method
pipeline._extract_business_features("/fake/path.pdf", result, "full text")
# Verify error was captured with type info
assert len(result.errors) == 1
assert "ValueError" in result.errors[0]
assert "Test error message" in result.errors[0]
def test_extract_business_features_handles_numeric_exceptions(self):
"""Test that _extract_business_features handles non-standard exceptions."""
from backend.pipeline.pipeline import InferencePipeline, InferenceResult
with patch.object(InferencePipeline, '__init__', lambda self, **kwargs: None):
pipeline = InferencePipeline()
pipeline.line_items_extractor = MagicMock()
pipeline.vat_extractor = MagicMock()
pipeline.vat_validator = MagicMock()
# Simulate an exception that might have a numeric value (like exit codes)
class NumericException(Exception):
def __str__(self):
return "0"
pipeline.line_items_extractor.extract_from_pdf.side_effect = NumericException()
result = InferenceResult()
pipeline._extract_business_features("/fake/path.pdf", result, "full text")
# Should include type name even when str(e) is just "0"
assert len(result.errors) == 1
assert "NumericException" in result.errors[0]
if __name__ == '__main__':
pytest.main([__file__, '-v'])