Update paddle, and support invoice line item

2026-02-03 21:28:06 +01:00
parent c4e3773df1
commit 35988b1ebf
41 changed files with 6832 additions and 48 deletions
--- a/tests/vat/test_vat_extractor.py
+++ b/tests/vat/test_vat_extractor.py
@@ -0,0 +1,264 @@
+"""
+Tests for VAT Extractor
+
+Tests extraction of VAT (Moms) information from Swedish invoice text.
+"""
+
+import pytest
+from backend.vat.vat_extractor import (
+    VATBreakdown,
+    VATSummary,
+    VATExtractor,
+    AmountParser,
+)
+
+
+class TestAmountParser:
+    """Tests for Swedish amount parsing."""
+
+    def test_parse_swedish_format(self):
+        """Test parsing Swedish number format (1 234,56)."""
+        parser = AmountParser()
+        assert parser.parse("1 234,56") == 1234.56
+        assert parser.parse("100,00") == 100.0
+        assert parser.parse("1 000 000,00") == 1000000.0
+
+    def test_parse_with_currency(self):
+        """Test parsing amounts with currency suffix."""
+        parser = AmountParser()
+        assert parser.parse("1 234,56 SEK") == 1234.56
+        assert parser.parse("100,00 kr") == 100.0
+        assert parser.parse("SEK 500,00") == 500.0
+
+    def test_parse_european_format(self):
+        """Test parsing European format (1.234,56)."""
+        parser = AmountParser()
+        assert parser.parse("1.234,56") == 1234.56
+
+    def test_parse_us_format(self):
+        """Test parsing US format (1,234.56)."""
+        parser = AmountParser()
+        assert parser.parse("1,234.56") == 1234.56
+
+    def test_parse_invalid_returns_none(self):
+        """Test that invalid amounts return None."""
+        parser = AmountParser()
+        assert parser.parse("") is None
+        assert parser.parse("abc") is None
+        assert parser.parse("N/A") is None
+
+    def test_parse_negative_amount(self):
+        """Test parsing negative amounts."""
+        parser = AmountParser()
+        assert parser.parse("-100,00") == -100.0
+        assert parser.parse("-1 234,56") == -1234.56
+
+
+class TestVATBreakdown:
+    """Tests for VATBreakdown dataclass."""
+
+    def test_create_breakdown(self):
+        """Test creating a VAT breakdown."""
+        breakdown = VATBreakdown(
+            rate=25.0,
+            base_amount="10 000,00",
+            vat_amount="2 500,00",
+            source="regex",
+        )
+        assert breakdown.rate == 25.0
+        assert breakdown.base_amount == "10 000,00"
+        assert breakdown.vat_amount == "2 500,00"
+        assert breakdown.source == "regex"
+
+    def test_breakdown_with_optional_base(self):
+        """Test breakdown without base amount."""
+        breakdown = VATBreakdown(
+            rate=25.0,
+            base_amount=None,
+            vat_amount="2 500,00",
+            source="regex",
+        )
+        assert breakdown.base_amount is None
+
+
+class TestVATSummary:
+    """Tests for VATSummary dataclass."""
+
+    def test_create_summary(self):
+        """Test creating a VAT summary."""
+        breakdowns = [
+            VATBreakdown(rate=25.0, base_amount="8 000,00", vat_amount="2 000,00", source="regex"),
+            VATBreakdown(rate=12.0, base_amount="2 000,00", vat_amount="240,00", source="regex"),
+        ]
+        summary = VATSummary(
+            breakdowns=breakdowns,
+            total_excl_vat="10 000,00",
+            total_vat="2 240,00",
+            total_incl_vat="12 240,00",
+            confidence=0.95,
+        )
+        assert len(summary.breakdowns) == 2
+        assert summary.total_excl_vat == "10 000,00"
+
+    def test_empty_summary(self):
+        """Test empty VAT summary."""
+        summary = VATSummary(
+            breakdowns=[],
+            total_excl_vat=None,
+            total_vat=None,
+            total_incl_vat=None,
+            confidence=0.0,
+        )
+        assert summary.breakdowns == []
+
+
+class TestVATExtractor:
+    """Tests for VAT extraction from text."""
+
+    def test_extract_single_vat_rate(self):
+        """Test extracting single VAT rate from text."""
+        text = """
+        Summa exkl. moms: 10 000,00
+        Moms 25%: 2 500,00
+        Summa inkl. moms: 12 500,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert len(summary.breakdowns) == 1
+        assert summary.breakdowns[0].rate == 25.0
+        assert summary.breakdowns[0].vat_amount == "2 500,00"
+
+    def test_extract_multiple_vat_rates(self):
+        """Test extracting multiple VAT rates."""
+        text = """
+        Moms 25%: 2 000,00
+        Moms 12%: 240,00
+        Moms 6%: 60,00
+        Summa moms: 2 300,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert len(summary.breakdowns) == 3
+        rates = [b.rate for b in summary.breakdowns]
+        assert 25.0 in rates
+        assert 12.0 in rates
+        assert 6.0 in rates
+
+    def test_extract_varav_moms_format(self):
+        """Test extracting 'Varav moms' format."""
+        text = """
+        Totalt: 12 500,00
+        Varav moms 25% 2 500,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert len(summary.breakdowns) == 1
+        assert summary.breakdowns[0].rate == 25.0
+        assert summary.breakdowns[0].vat_amount == "2 500,00"
+
+    def test_extract_percentage_moms_format(self):
+        """Test extracting '25% moms:' format."""
+        text = """
+        25% moms: 2 500,00
+        12% moms: 240,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert len(summary.breakdowns) == 2
+
+    def test_extract_totals(self):
+        """Test extracting total amounts."""
+        text = """
+        Summa exkl. moms: 10 000,00
+        Summa moms: 2 500,00
+        Totalt att betala: 12 500,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert summary.total_excl_vat == "10 000,00"
+        assert summary.total_vat == "2 500,00"
+
+    def test_extract_with_underlag(self):
+        """Test extracting VAT with base amount (Underlag)."""
+        text = """
+        Moms 25%: 2 500,00 (Underlag 10 000,00)
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert len(summary.breakdowns) == 1
+        assert summary.breakdowns[0].rate == 25.0
+        assert summary.breakdowns[0].vat_amount == "2 500,00"
+        assert summary.breakdowns[0].base_amount == "10 000,00"
+
+    def test_extract_from_empty_text(self):
+        """Test extraction from empty text."""
+        extractor = VATExtractor()
+        summary = extractor.extract("")
+
+        assert summary.breakdowns == []
+        assert summary.confidence == 0.0
+
+    def test_extract_zero_vat(self):
+        """Test extracting 0% VAT."""
+        text = """
+        Moms 0%: 0,00
+        Summa exkl. moms: 1 000,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        rates = [b.rate for b in summary.breakdowns]
+        assert 0.0 in rates
+
+    def test_extract_netto_brutto_format(self):
+        """Test extracting Netto/Brutto format."""
+        text = """
+        Netto: 10 000,00
+        Moms: 2 500,00
+        Brutto: 12 500,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        assert summary.total_excl_vat == "10 000,00"
+        # Should detect implicit 25% rate from math
+
+    def test_confidence_calculation(self):
+        """Test confidence score calculation."""
+        extractor = VATExtractor()
+
+        # High confidence - multiple sources agree (including Summa moms)
+        text_high = """
+        Summa exkl. moms: 10 000,00
+        Moms 25%: 2 500,00
+        Summa moms: 2 500,00
+        Summa inkl. moms: 12 500,00
+        """
+        summary_high = extractor.extract(text_high)
+        assert summary_high.confidence >= 0.8
+
+        # Lower confidence - only partial info
+        text_low = """
+        Moms: 2 500,00
+        """
+        summary_low = extractor.extract(text_low)
+        assert summary_low.confidence < summary_high.confidence
+
+    def test_handles_ocr_noise(self):
+        """Test handling OCR noise in text."""
+        text = """
+        Summa exkl moms: 10 000,00
+        Mams 25%: 2 500,00
+        Sum ma inkl. moms: 12 500,00
+        """
+        extractor = VATExtractor()
+        summary = extractor.extract(text)
+
+        # Should still extract some information despite noise
+        assert summary.total_excl_vat is not None or len(summary.breakdowns) > 0