This commit is contained in:
Yaojia Wang
2026-01-22 22:03:24 +01:00
parent 4ea4bc96d4
commit 8fd61ea928
19 changed files with 4069 additions and 226 deletions

View File

@@ -266,6 +266,38 @@ class TestNormalizePaymentLine:
assert is_valid is True
# Bankgiro should be normalized despite spaces
def test_payment_line_with_spaces_in_check_digits(self, extractor):
"""Test payment line with spaces around check digits: #41 # instead of #41#."""
text = "# 6026726908 # 736 00 9 > 5692041 #41 #"
result, is_valid, error = extractor._normalize_payment_line(text)
assert result is not None
assert is_valid is True
assert "6026726908" in result
assert "736 00" in result
assert "5692041#41#" in result
def test_payment_line_with_ocr_spaces_in_amount(self, extractor):
"""Test payment line with OCR-induced spaces in amount: '12 0 0 00' -> '1200 00'."""
text = "# 11000770600242 # 12 0 0 00 5 3082963#41#"
result, is_valid, error = extractor._normalize_payment_line(text)
assert result is not None
assert is_valid is True
assert "11000770600242" in result
assert "1200 00" in result
assert "3082963#41#" in result
def test_payment_line_without_greater_symbol(self, extractor):
"""Test payment line with missing > symbol (low-DPI OCR issue)."""
text = "# 11000770600242 # 1200 00 5 3082963#41#"
result, is_valid, error = extractor._normalize_payment_line(text)
assert result is not None
assert is_valid is True
assert "11000770600242" in result
assert "1200 00" in result
class TestNormalizeCustomerNumber:
"""Tests for customer number normalization."""
@@ -284,6 +316,33 @@ class TestNormalizeCustomerNumber:
result, is_valid, error = extractor._normalize_customer_number("JTY5763")
assert result is not None
def test_format_without_dash(self, extractor):
"""Test customer number format without dash: Dwq 211X -> DWQ 211-X."""
text = "Dwq 211X Billo SE 106 43 Stockholm"
result, is_valid, error = extractor._normalize_customer_number(text)
assert result is not None
assert is_valid is True
assert result == "DWQ 211-X"
def test_swedish_postal_code_exclusion(self, extractor):
"""Test that Swedish postal codes are excluded: SE 106 43 should not be extracted."""
text = "SE 106 43 Stockholm"
result, is_valid, error = extractor._normalize_customer_number(text)
# Should not extract postal code
assert result is None or "SE 106" not in result
def test_customer_number_with_postal_code_in_text(self, extractor):
"""Test extracting customer number when postal code is also present."""
text = "Customer: ABC 123X, Address: SE 106 43 Stockholm"
result, is_valid, error = extractor._normalize_customer_number(text)
assert result is not None
assert "ABC" in result
# Should not extract postal code
assert "SE 106" not in result if result else True
class TestNormalizeSupplierOrgNumber:
"""Tests for supplier organization number normalization."""