Re-structure the project.
This commit is contained in:
282
tests/test_payment_line_parser.py
Normal file
282
tests/test_payment_line_parser.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Tests for payment line parser.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from src.inference.payment_line_parser import PaymentLineParser, PaymentLineData
|
||||
|
||||
|
||||
class TestPaymentLineParser:
|
||||
"""Test PaymentLineParser class."""
|
||||
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
"""Create parser instance."""
|
||||
return PaymentLineParser()
|
||||
|
||||
def test_parse_full_format_with_amount(self, parser):
|
||||
"""Test parsing full format with amount."""
|
||||
text = "# 94228110015950070 # 15658 00 8 > 48666036#14#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "94228110015950070"
|
||||
assert data.amount == "15658.00"
|
||||
assert data.account_number == "48666036"
|
||||
assert data.record_type == "8"
|
||||
assert data.check_digits == "14"
|
||||
assert data.parse_method == "full"
|
||||
|
||||
def test_parse_with_spaces_in_amount(self, parser):
|
||||
"""Test parsing with OCR-induced spaces in amount."""
|
||||
text = "# 11000770600242 # 12 0 0 00 5 > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "11000770600242"
|
||||
assert data.amount == "1200.00" # Spaces removed
|
||||
assert data.account_number == "3082963"
|
||||
assert data.record_type == "5"
|
||||
assert data.check_digits == "41"
|
||||
|
||||
def test_parse_with_spaces_in_check_digits(self, parser):
|
||||
"""Test parsing with spaces around check digits: #41 # instead of #41#."""
|
||||
text = "# 6026726908 # 736 00 9 > 5692041 #41 #"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "6026726908"
|
||||
assert data.amount == "736.00"
|
||||
assert data.account_number == "5692041"
|
||||
assert data.check_digits == "41"
|
||||
|
||||
def test_parse_without_greater_than_symbol(self, parser):
|
||||
"""Test parsing when > symbol is missing (OCR error)."""
|
||||
text = "# 11000770600242 # 1200 00 5 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "11000770600242"
|
||||
assert data.amount == "1200.00"
|
||||
assert data.account_number == "3082963"
|
||||
|
||||
def test_parse_format_without_amount(self, parser):
|
||||
"""Test parsing format without amount."""
|
||||
text = "# 11000770600242 # > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "11000770600242"
|
||||
assert data.amount is None
|
||||
assert data.account_number == "3082963"
|
||||
assert data.check_digits == "41"
|
||||
assert data.parse_method == "no_amount"
|
||||
|
||||
def test_parse_account_only_format(self, parser):
|
||||
"""Test parsing account-only format."""
|
||||
text = "> 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == ""
|
||||
assert data.amount is None
|
||||
assert data.account_number == "3082963"
|
||||
assert data.check_digits == "41"
|
||||
assert data.parse_method == "account_only"
|
||||
assert "Partial" in data.error
|
||||
|
||||
def test_parse_invalid_format(self, parser):
|
||||
"""Test parsing invalid format."""
|
||||
text = "This is not a payment line"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert not data.is_valid
|
||||
assert data.error is not None
|
||||
assert "No valid payment line format" in data.error
|
||||
|
||||
def test_parse_empty_text(self, parser):
|
||||
"""Test parsing empty text."""
|
||||
data = parser.parse("")
|
||||
|
||||
assert not data.is_valid
|
||||
assert data.error == "Empty payment line text"
|
||||
|
||||
def test_format_machine_readable_full(self, parser):
|
||||
"""Test formatting full data to machine-readable format."""
|
||||
data = PaymentLineData(
|
||||
ocr_number="94228110015950070",
|
||||
amount="15658.00",
|
||||
account_number="48666036",
|
||||
record_type="8",
|
||||
check_digits="14",
|
||||
raw_text="original",
|
||||
is_valid=True
|
||||
)
|
||||
|
||||
formatted = parser.format_machine_readable(data)
|
||||
|
||||
assert "# 94228110015950070 #" in formatted
|
||||
assert "15658 00 8" in formatted
|
||||
assert "48666036#14#" in formatted
|
||||
|
||||
def test_format_machine_readable_no_amount(self, parser):
|
||||
"""Test formatting data without amount."""
|
||||
data = PaymentLineData(
|
||||
ocr_number="11000770600242",
|
||||
amount=None,
|
||||
account_number="3082963",
|
||||
record_type=None,
|
||||
check_digits="41",
|
||||
raw_text="original",
|
||||
is_valid=True
|
||||
)
|
||||
|
||||
formatted = parser.format_machine_readable(data)
|
||||
|
||||
assert "# 11000770600242 #" in formatted
|
||||
assert "3082963#41#" in formatted
|
||||
|
||||
def test_format_machine_readable_account_only(self, parser):
|
||||
"""Test formatting account-only data."""
|
||||
data = PaymentLineData(
|
||||
ocr_number="",
|
||||
amount=None,
|
||||
account_number="3082963",
|
||||
record_type=None,
|
||||
check_digits="41",
|
||||
raw_text="original",
|
||||
is_valid=True
|
||||
)
|
||||
|
||||
formatted = parser.format_machine_readable(data)
|
||||
|
||||
assert "> 3082963#41#" in formatted
|
||||
|
||||
def test_format_for_field_extractor_valid(self, parser):
|
||||
"""Test formatting for FieldExtractor API (valid data)."""
|
||||
text = "# 6026726908 # 736 00 9 > 5692041#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
formatted, is_valid, error = parser.format_for_field_extractor(data)
|
||||
|
||||
assert is_valid
|
||||
assert formatted is not None
|
||||
assert "# 6026726908 #" in formatted
|
||||
assert "736 00" in formatted
|
||||
|
||||
def test_format_for_field_extractor_invalid(self, parser):
|
||||
"""Test formatting for FieldExtractor API (invalid data)."""
|
||||
text = "invalid payment line"
|
||||
data = parser.parse(text)
|
||||
|
||||
formatted, is_valid, error = parser.format_for_field_extractor(data)
|
||||
|
||||
assert not is_valid
|
||||
assert formatted is None
|
||||
assert error is not None
|
||||
|
||||
|
||||
class TestRealWorldExamples:
|
||||
"""Test with real-world payment line examples from the codebase."""
|
||||
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
"""Create parser instance."""
|
||||
return PaymentLineParser()
|
||||
|
||||
def test_billo310_payment_line(self, parser):
|
||||
"""Test Billo310 PDF payment line (from issue report)."""
|
||||
# This is the payment line that had Amount extraction issue
|
||||
text = "# 6026726908 # 736 00 9 > 5692041 #41 #"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "736.00" # Correct amount
|
||||
assert data.account_number == "5692041"
|
||||
|
||||
def test_billo363_payment_line(self, parser):
|
||||
"""Test Billo363 PDF payment line."""
|
||||
text = "# 11000770600242 # 12 0 0 00 5 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "1200.00"
|
||||
assert data.ocr_number == "11000770600242"
|
||||
|
||||
def test_payment_line_with_spaces_in_account(self, parser):
|
||||
"""Test payment line with spaces in account number."""
|
||||
text = "# 94228110015950070 # 15658 00 8 > 4 8 6 6 6 0 3 6#14#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.account_number == "48666036" # Spaces removed
|
||||
|
||||
def test_multiple_spaces_in_amounts(self, parser):
|
||||
"""Test handling multiple spaces in amount."""
|
||||
text = "# 11000770600242 # 1 2 0 0 00 5 > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "1200.00"
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and error conditions."""
|
||||
|
||||
@pytest.fixture
|
||||
def parser(self):
|
||||
"""Create parser instance."""
|
||||
return PaymentLineParser()
|
||||
|
||||
def test_very_long_ocr_number(self, parser):
|
||||
"""Test with very long OCR number."""
|
||||
text = "# 123456789012345678901234567890 # 1000 00 5 > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.ocr_number == "123456789012345678901234567890"
|
||||
|
||||
def test_zero_amount(self, parser):
|
||||
"""Test with zero amount."""
|
||||
text = "# 11000770600242 # 0 00 5 > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "0.00"
|
||||
|
||||
def test_large_amount(self, parser):
|
||||
"""Test with large amount."""
|
||||
text = "# 11000770600242 # 999999 99 5 > 3082963#41#"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "999999.99"
|
||||
|
||||
def test_text_with_extra_characters(self, parser):
|
||||
"""Test with extra characters around payment line."""
|
||||
text = "Some text before # 6026726908 # 736 00 9 > 5692041#41# and after"
|
||||
data = parser.parse(text)
|
||||
|
||||
assert data.is_valid
|
||||
assert data.amount == "736.00"
|
||||
|
||||
def test_none_input(self, parser):
|
||||
"""Test with None input."""
|
||||
data = parser.parse(None)
|
||||
|
||||
assert not data.is_valid
|
||||
assert data.error is not None
|
||||
|
||||
def test_whitespace_only(self, parser):
|
||||
"""Test with whitespace only."""
|
||||
data = parser.parse(" \t\n ")
|
||||
|
||||
assert not data.is_valid
|
||||
assert "Empty" in data.error
|
||||
Reference in New Issue
Block a user