Update paddle, and support invoice line item

2026-02-03 21:28:06 +01:00
parent c4e3773df1
commit 35988b1ebf
41 changed files with 6832 additions and 48 deletions
--- a/tests/inference/test_normalizers.py
+++ b/tests/inference/test_normalizers.py
@@ -750,7 +750,7 @@ class TestNormalizerRegistry:
        assert "Amount" in registry
        assert "InvoiceDate" in registry
        assert "InvoiceDueDate" in registry
-        assert "supplier_org_number" in registry
+        assert "supplier_organisation_number" in registry

    def test_registry_with_enhanced(self):
        registry = create_normalizer_registry(use_enhanced=True)
--- a/tests/inference/test_pipeline.py
+++ b/tests/inference/test_pipeline.py
@@ -322,5 +322,180 @@ class TestAmountNormalization:
        assert normalized == '11699'


+class TestBusinessFeatures:
+    """Tests for business invoice features (line items, VAT, validation)."""
+
+    def test_inference_result_has_business_fields(self):
+        """Test that InferenceResult has business feature fields."""
+        result = InferenceResult()
+        assert result.line_items is None
+        assert result.vat_summary is None
+        assert result.vat_validation is None
+
+    def test_to_json_without_business_features(self):
+        """Test to_json works without business features."""
+        result = InferenceResult()
+        result.fields = {'InvoiceNumber': '12345'}
+        result.confidence = {'InvoiceNumber': 0.95}
+
+        json_result = result.to_json()
+
+        assert json_result['InvoiceNumber'] == '12345'
+        assert 'line_items' not in json_result
+        assert 'vat_summary' not in json_result
+        assert 'vat_validation' not in json_result
+
+    def test_to_json_with_line_items(self):
+        """Test to_json includes line items when present."""
+        from backend.table.line_items_extractor import LineItem, LineItemsResult
+
+        result = InferenceResult()
+        result.fields = {'Amount': '12500.00'}
+        result.line_items = LineItemsResult(
+            items=[
+                LineItem(
+                    row_index=0,
+                    description="Product A",
+                    quantity="2",
+                    unit_price="5000,00",
+                    amount="10000,00",
+                    vat_rate="25",
+                    confidence=0.9
+                )
+            ],
+            header_row=["Beskrivning", "Antal", "Pris", "Belopp", "Moms"],
+            raw_html="<table>...</table>"
+        )
+
+        json_result = result.to_json()
+
+        assert 'line_items' in json_result
+        assert len(json_result['line_items']['items']) == 1
+        assert json_result['line_items']['items'][0]['description'] == "Product A"
+        assert json_result['line_items']['items'][0]['amount'] == "10000,00"
+
+    def test_to_json_with_vat_summary(self):
+        """Test to_json includes VAT summary when present."""
+        from backend.vat.vat_extractor import VATBreakdown, VATSummary
+
+        result = InferenceResult()
+        result.vat_summary = VATSummary(
+            breakdowns=[
+                VATBreakdown(rate=25.0, base_amount="10000,00", vat_amount="2500,00", source="regex")
+            ],
+            total_excl_vat="10000,00",
+            total_vat="2500,00",
+            total_incl_vat="12500,00",
+            confidence=0.9
+        )
+
+        json_result = result.to_json()
+
+        assert 'vat_summary' in json_result
+        assert len(json_result['vat_summary']['breakdowns']) == 1
+        assert json_result['vat_summary']['breakdowns'][0]['rate'] == 25.0
+        assert json_result['vat_summary']['total_incl_vat'] == "12500,00"
+
+    def test_to_json_with_vat_validation(self):
+        """Test to_json includes VAT validation when present."""
+        from backend.validation.vat_validator import VATValidationResult, MathCheckResult
+
+        result = InferenceResult()
+        result.vat_validation = VATValidationResult(
+            is_valid=True,
+            confidence_score=0.95,
+            math_checks=[
+                MathCheckResult(
+                    rate=25.0,
+                    base_amount=10000.0,
+                    expected_vat=2500.0,
+                    actual_vat=2500.0,
+                    is_valid=True,
+                    tolerance=0.5
+                )
+            ],
+            total_check=True,
+            line_items_vs_summary=True,
+            amount_consistency=True,
+            needs_review=False,
+            review_reasons=[]
+        )
+
+        json_result = result.to_json()
+
+        assert 'vat_validation' in json_result
+        assert json_result['vat_validation']['is_valid'] is True
+        assert json_result['vat_validation']['confidence_score'] == 0.95
+        assert len(json_result['vat_validation']['math_checks']) == 1
+
+
+class TestBusinessFeaturesAvailable:
+    """Tests for BUSINESS_FEATURES_AVAILABLE flag."""
+
+    def test_business_features_available(self):
+        """Test that business features are available."""
+        from backend.pipeline import BUSINESS_FEATURES_AVAILABLE
+        assert BUSINESS_FEATURES_AVAILABLE is True
+
+
+class TestExtractBusinessFeaturesErrorHandling:
+    """Tests for _extract_business_features error handling."""
+
+    def test_pipeline_module_has_logger(self):
+        """Test that pipeline module defines logger correctly."""
+        from backend.pipeline import pipeline
+        assert hasattr(pipeline, 'logger')
+        assert pipeline.logger is not None
+
+    def test_extract_business_features_logs_errors(self):
+        """Test that _extract_business_features logs detailed errors."""
+        from backend.pipeline.pipeline import InferencePipeline, InferenceResult
+
+        # Create a pipeline with mocked extractors that raise an exception
+        with patch.object(InferencePipeline, '__init__', lambda self, **kwargs: None):
+            pipeline = InferencePipeline()
+            pipeline.line_items_extractor = MagicMock()
+            pipeline.vat_extractor = MagicMock()
+            pipeline.vat_validator = MagicMock()
+
+            # Make line_items_extractor raise an exception
+            test_error = ValueError("Test error message")
+            pipeline.line_items_extractor.extract_from_pdf.side_effect = test_error
+
+            result = InferenceResult()
+
+            # Call the method
+            pipeline._extract_business_features("/fake/path.pdf", result, "full text")
+
+            # Verify error was captured with type info
+            assert len(result.errors) == 1
+            assert "ValueError" in result.errors[0]
+            assert "Test error message" in result.errors[0]
+
+    def test_extract_business_features_handles_numeric_exceptions(self):
+        """Test that _extract_business_features handles non-standard exceptions."""
+        from backend.pipeline.pipeline import InferencePipeline, InferenceResult
+
+        with patch.object(InferencePipeline, '__init__', lambda self, **kwargs: None):
+            pipeline = InferencePipeline()
+            pipeline.line_items_extractor = MagicMock()
+            pipeline.vat_extractor = MagicMock()
+            pipeline.vat_validator = MagicMock()
+
+            # Simulate an exception that might have a numeric value (like exit codes)
+            class NumericException(Exception):
+                def __str__(self):
+                    return "0"
+
+            pipeline.line_items_extractor.extract_from_pdf.side_effect = NumericException()
+
+            result = InferenceResult()
+            pipeline._extract_business_features("/fake/path.pdf", result, "full text")
+
+            # Should include type name even when str(e) is just "0"
+            assert len(result.errors) == 1
+            assert "NumericException" in result.errors[0]
+
+
 if __name__ == '__main__':
    pytest.main([__file__, '-v'])