""" Tests for VAT Extractor Tests extraction of VAT (Moms) information from Swedish invoice text. """ import pytest from backend.vat.vat_extractor import ( VATBreakdown, VATSummary, VATExtractor, AmountParser, ) class TestAmountParser: """Tests for Swedish amount parsing.""" def test_parse_swedish_format(self): """Test parsing Swedish number format (1 234,56).""" parser = AmountParser() assert parser.parse("1 234,56") == 1234.56 assert parser.parse("100,00") == 100.0 assert parser.parse("1 000 000,00") == 1000000.0 def test_parse_with_currency(self): """Test parsing amounts with currency suffix.""" parser = AmountParser() assert parser.parse("1 234,56 SEK") == 1234.56 assert parser.parse("100,00 kr") == 100.0 assert parser.parse("SEK 500,00") == 500.0 def test_parse_european_format(self): """Test parsing European format (1.234,56).""" parser = AmountParser() assert parser.parse("1.234,56") == 1234.56 def test_parse_us_format(self): """Test parsing US format (1,234.56).""" parser = AmountParser() assert parser.parse("1,234.56") == 1234.56 def test_parse_invalid_returns_none(self): """Test that invalid amounts return None.""" parser = AmountParser() assert parser.parse("") is None assert parser.parse("abc") is None assert parser.parse("N/A") is None def test_parse_negative_amount(self): """Test parsing negative amounts.""" parser = AmountParser() assert parser.parse("-100,00") == -100.0 assert parser.parse("-1 234,56") == -1234.56 class TestVATBreakdown: """Tests for VATBreakdown dataclass.""" def test_create_breakdown(self): """Test creating a VAT breakdown.""" breakdown = VATBreakdown( rate=25.0, base_amount="10 000,00", vat_amount="2 500,00", source="regex", ) assert breakdown.rate == 25.0 assert breakdown.base_amount == "10 000,00" assert breakdown.vat_amount == "2 500,00" assert breakdown.source == "regex" def test_breakdown_with_optional_base(self): """Test breakdown without base amount.""" breakdown = VATBreakdown( rate=25.0, base_amount=None, vat_amount="2 500,00", source="regex", ) assert breakdown.base_amount is None class TestVATSummary: """Tests for VATSummary dataclass.""" def test_create_summary(self): """Test creating a VAT summary.""" breakdowns = [ VATBreakdown(rate=25.0, base_amount="8 000,00", vat_amount="2 000,00", source="regex"), VATBreakdown(rate=12.0, base_amount="2 000,00", vat_amount="240,00", source="regex"), ] summary = VATSummary( breakdowns=breakdowns, total_excl_vat="10 000,00", total_vat="2 240,00", total_incl_vat="12 240,00", confidence=0.95, ) assert len(summary.breakdowns) == 2 assert summary.total_excl_vat == "10 000,00" def test_empty_summary(self): """Test empty VAT summary.""" summary = VATSummary( breakdowns=[], total_excl_vat=None, total_vat=None, total_incl_vat=None, confidence=0.0, ) assert summary.breakdowns == [] class TestVATExtractor: """Tests for VAT extraction from text.""" def test_extract_single_vat_rate(self): """Test extracting single VAT rate from text.""" text = """ Summa exkl. moms: 10 000,00 Moms 25%: 2 500,00 Summa inkl. moms: 12 500,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert len(summary.breakdowns) == 1 assert summary.breakdowns[0].rate == 25.0 assert summary.breakdowns[0].vat_amount == "2 500,00" def test_extract_multiple_vat_rates(self): """Test extracting multiple VAT rates.""" text = """ Moms 25%: 2 000,00 Moms 12%: 240,00 Moms 6%: 60,00 Summa moms: 2 300,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert len(summary.breakdowns) == 3 rates = [b.rate for b in summary.breakdowns] assert 25.0 in rates assert 12.0 in rates assert 6.0 in rates def test_extract_varav_moms_format(self): """Test extracting 'Varav moms' format.""" text = """ Totalt: 12 500,00 Varav moms 25% 2 500,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert len(summary.breakdowns) == 1 assert summary.breakdowns[0].rate == 25.0 assert summary.breakdowns[0].vat_amount == "2 500,00" def test_extract_percentage_moms_format(self): """Test extracting '25% moms:' format.""" text = """ 25% moms: 2 500,00 12% moms: 240,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert len(summary.breakdowns) == 2 def test_extract_totals(self): """Test extracting total amounts.""" text = """ Summa exkl. moms: 10 000,00 Summa moms: 2 500,00 Totalt att betala: 12 500,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert summary.total_excl_vat == "10 000,00" assert summary.total_vat == "2 500,00" def test_extract_with_underlag(self): """Test extracting VAT with base amount (Underlag).""" text = """ Moms 25%: 2 500,00 (Underlag 10 000,00) """ extractor = VATExtractor() summary = extractor.extract(text) assert len(summary.breakdowns) == 1 assert summary.breakdowns[0].rate == 25.0 assert summary.breakdowns[0].vat_amount == "2 500,00" assert summary.breakdowns[0].base_amount == "10 000,00" def test_extract_from_empty_text(self): """Test extraction from empty text.""" extractor = VATExtractor() summary = extractor.extract("") assert summary.breakdowns == [] assert summary.confidence == 0.0 def test_extract_zero_vat(self): """Test extracting 0% VAT.""" text = """ Moms 0%: 0,00 Summa exkl. moms: 1 000,00 """ extractor = VATExtractor() summary = extractor.extract(text) rates = [b.rate for b in summary.breakdowns] assert 0.0 in rates def test_extract_netto_brutto_format(self): """Test extracting Netto/Brutto format.""" text = """ Netto: 10 000,00 Moms: 2 500,00 Brutto: 12 500,00 """ extractor = VATExtractor() summary = extractor.extract(text) assert summary.total_excl_vat == "10 000,00" # Should detect implicit 25% rate from math def test_confidence_calculation(self): """Test confidence score calculation.""" extractor = VATExtractor() # High confidence - multiple sources agree (including Summa moms) text_high = """ Summa exkl. moms: 10 000,00 Moms 25%: 2 500,00 Summa moms: 2 500,00 Summa inkl. moms: 12 500,00 """ summary_high = extractor.extract(text_high) assert summary_high.confidence >= 0.8 # Lower confidence - only partial info text_low = """ Moms: 2 500,00 """ summary_low = extractor.extract(text_low) assert summary_low.confidence < summary_high.confidence def test_handles_ocr_noise(self): """Test handling OCR noise in text.""" text = """ Summa exkl moms: 10 000,00 Mams 25%: 2 500,00 Sum ma inkl. moms: 12 500,00 """ extractor = VATExtractor() summary = extractor.extract(text) # Should still extract some information despite noise assert summary.total_excl_vat is not None or len(summary.breakdowns) > 0