Update paddle, and support invoice line item
This commit is contained in:
264
tests/vat/test_vat_extractor.py
Normal file
264
tests/vat/test_vat_extractor.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Tests for VAT Extractor
|
||||
|
||||
Tests extraction of VAT (Moms) information from Swedish invoice text.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from backend.vat.vat_extractor import (
|
||||
VATBreakdown,
|
||||
VATSummary,
|
||||
VATExtractor,
|
||||
AmountParser,
|
||||
)
|
||||
|
||||
|
||||
class TestAmountParser:
|
||||
"""Tests for Swedish amount parsing."""
|
||||
|
||||
def test_parse_swedish_format(self):
|
||||
"""Test parsing Swedish number format (1 234,56)."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("1 234,56") == 1234.56
|
||||
assert parser.parse("100,00") == 100.0
|
||||
assert parser.parse("1 000 000,00") == 1000000.0
|
||||
|
||||
def test_parse_with_currency(self):
|
||||
"""Test parsing amounts with currency suffix."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("1 234,56 SEK") == 1234.56
|
||||
assert parser.parse("100,00 kr") == 100.0
|
||||
assert parser.parse("SEK 500,00") == 500.0
|
||||
|
||||
def test_parse_european_format(self):
|
||||
"""Test parsing European format (1.234,56)."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("1.234,56") == 1234.56
|
||||
|
||||
def test_parse_us_format(self):
|
||||
"""Test parsing US format (1,234.56)."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("1,234.56") == 1234.56
|
||||
|
||||
def test_parse_invalid_returns_none(self):
|
||||
"""Test that invalid amounts return None."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("") is None
|
||||
assert parser.parse("abc") is None
|
||||
assert parser.parse("N/A") is None
|
||||
|
||||
def test_parse_negative_amount(self):
|
||||
"""Test parsing negative amounts."""
|
||||
parser = AmountParser()
|
||||
assert parser.parse("-100,00") == -100.0
|
||||
assert parser.parse("-1 234,56") == -1234.56
|
||||
|
||||
|
||||
class TestVATBreakdown:
|
||||
"""Tests for VATBreakdown dataclass."""
|
||||
|
||||
def test_create_breakdown(self):
|
||||
"""Test creating a VAT breakdown."""
|
||||
breakdown = VATBreakdown(
|
||||
rate=25.0,
|
||||
base_amount="10 000,00",
|
||||
vat_amount="2 500,00",
|
||||
source="regex",
|
||||
)
|
||||
assert breakdown.rate == 25.0
|
||||
assert breakdown.base_amount == "10 000,00"
|
||||
assert breakdown.vat_amount == "2 500,00"
|
||||
assert breakdown.source == "regex"
|
||||
|
||||
def test_breakdown_with_optional_base(self):
|
||||
"""Test breakdown without base amount."""
|
||||
breakdown = VATBreakdown(
|
||||
rate=25.0,
|
||||
base_amount=None,
|
||||
vat_amount="2 500,00",
|
||||
source="regex",
|
||||
)
|
||||
assert breakdown.base_amount is None
|
||||
|
||||
|
||||
class TestVATSummary:
|
||||
"""Tests for VATSummary dataclass."""
|
||||
|
||||
def test_create_summary(self):
|
||||
"""Test creating a VAT summary."""
|
||||
breakdowns = [
|
||||
VATBreakdown(rate=25.0, base_amount="8 000,00", vat_amount="2 000,00", source="regex"),
|
||||
VATBreakdown(rate=12.0, base_amount="2 000,00", vat_amount="240,00", source="regex"),
|
||||
]
|
||||
summary = VATSummary(
|
||||
breakdowns=breakdowns,
|
||||
total_excl_vat="10 000,00",
|
||||
total_vat="2 240,00",
|
||||
total_incl_vat="12 240,00",
|
||||
confidence=0.95,
|
||||
)
|
||||
assert len(summary.breakdowns) == 2
|
||||
assert summary.total_excl_vat == "10 000,00"
|
||||
|
||||
def test_empty_summary(self):
|
||||
"""Test empty VAT summary."""
|
||||
summary = VATSummary(
|
||||
breakdowns=[],
|
||||
total_excl_vat=None,
|
||||
total_vat=None,
|
||||
total_incl_vat=None,
|
||||
confidence=0.0,
|
||||
)
|
||||
assert summary.breakdowns == []
|
||||
|
||||
|
||||
class TestVATExtractor:
|
||||
"""Tests for VAT extraction from text."""
|
||||
|
||||
def test_extract_single_vat_rate(self):
|
||||
"""Test extracting single VAT rate from text."""
|
||||
text = """
|
||||
Summa exkl. moms: 10 000,00
|
||||
Moms 25%: 2 500,00
|
||||
Summa inkl. moms: 12 500,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert len(summary.breakdowns) == 1
|
||||
assert summary.breakdowns[0].rate == 25.0
|
||||
assert summary.breakdowns[0].vat_amount == "2 500,00"
|
||||
|
||||
def test_extract_multiple_vat_rates(self):
|
||||
"""Test extracting multiple VAT rates."""
|
||||
text = """
|
||||
Moms 25%: 2 000,00
|
||||
Moms 12%: 240,00
|
||||
Moms 6%: 60,00
|
||||
Summa moms: 2 300,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert len(summary.breakdowns) == 3
|
||||
rates = [b.rate for b in summary.breakdowns]
|
||||
assert 25.0 in rates
|
||||
assert 12.0 in rates
|
||||
assert 6.0 in rates
|
||||
|
||||
def test_extract_varav_moms_format(self):
|
||||
"""Test extracting 'Varav moms' format."""
|
||||
text = """
|
||||
Totalt: 12 500,00
|
||||
Varav moms 25% 2 500,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert len(summary.breakdowns) == 1
|
||||
assert summary.breakdowns[0].rate == 25.0
|
||||
assert summary.breakdowns[0].vat_amount == "2 500,00"
|
||||
|
||||
def test_extract_percentage_moms_format(self):
|
||||
"""Test extracting '25% moms:' format."""
|
||||
text = """
|
||||
25% moms: 2 500,00
|
||||
12% moms: 240,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert len(summary.breakdowns) == 2
|
||||
|
||||
def test_extract_totals(self):
|
||||
"""Test extracting total amounts."""
|
||||
text = """
|
||||
Summa exkl. moms: 10 000,00
|
||||
Summa moms: 2 500,00
|
||||
Totalt att betala: 12 500,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert summary.total_excl_vat == "10 000,00"
|
||||
assert summary.total_vat == "2 500,00"
|
||||
|
||||
def test_extract_with_underlag(self):
|
||||
"""Test extracting VAT with base amount (Underlag)."""
|
||||
text = """
|
||||
Moms 25%: 2 500,00 (Underlag 10 000,00)
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert len(summary.breakdowns) == 1
|
||||
assert summary.breakdowns[0].rate == 25.0
|
||||
assert summary.breakdowns[0].vat_amount == "2 500,00"
|
||||
assert summary.breakdowns[0].base_amount == "10 000,00"
|
||||
|
||||
def test_extract_from_empty_text(self):
|
||||
"""Test extraction from empty text."""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract("")
|
||||
|
||||
assert summary.breakdowns == []
|
||||
assert summary.confidence == 0.0
|
||||
|
||||
def test_extract_zero_vat(self):
|
||||
"""Test extracting 0% VAT."""
|
||||
text = """
|
||||
Moms 0%: 0,00
|
||||
Summa exkl. moms: 1 000,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
rates = [b.rate for b in summary.breakdowns]
|
||||
assert 0.0 in rates
|
||||
|
||||
def test_extract_netto_brutto_format(self):
|
||||
"""Test extracting Netto/Brutto format."""
|
||||
text = """
|
||||
Netto: 10 000,00
|
||||
Moms: 2 500,00
|
||||
Brutto: 12 500,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
assert summary.total_excl_vat == "10 000,00"
|
||||
# Should detect implicit 25% rate from math
|
||||
|
||||
def test_confidence_calculation(self):
|
||||
"""Test confidence score calculation."""
|
||||
extractor = VATExtractor()
|
||||
|
||||
# High confidence - multiple sources agree (including Summa moms)
|
||||
text_high = """
|
||||
Summa exkl. moms: 10 000,00
|
||||
Moms 25%: 2 500,00
|
||||
Summa moms: 2 500,00
|
||||
Summa inkl. moms: 12 500,00
|
||||
"""
|
||||
summary_high = extractor.extract(text_high)
|
||||
assert summary_high.confidence >= 0.8
|
||||
|
||||
# Lower confidence - only partial info
|
||||
text_low = """
|
||||
Moms: 2 500,00
|
||||
"""
|
||||
summary_low = extractor.extract(text_low)
|
||||
assert summary_low.confidence < summary_high.confidence
|
||||
|
||||
def test_handles_ocr_noise(self):
|
||||
"""Test handling OCR noise in text."""
|
||||
text = """
|
||||
Summa exkl moms: 10 000,00
|
||||
Mams 25%: 2 500,00
|
||||
Sum ma inkl. moms: 12 500,00
|
||||
"""
|
||||
extractor = VATExtractor()
|
||||
summary = extractor.extract(text)
|
||||
|
||||
# Should still extract some information despite noise
|
||||
assert summary.total_excl_vat is not None or len(summary.breakdowns) > 0
|
||||
Reference in New Issue
Block a user