""" Tests for payment line parser. """ import pytest import sys from pathlib import Path # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from src.inference.payment_line_parser import PaymentLineParser, PaymentLineData class TestPaymentLineParser: """Test PaymentLineParser class.""" @pytest.fixture def parser(self): """Create parser instance.""" return PaymentLineParser() def test_parse_full_format_with_amount(self, parser): """Test parsing full format with amount.""" text = "# 94228110015950070 # 15658 00 8 > 48666036#14#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "94228110015950070" assert data.amount == "15658.00" assert data.account_number == "48666036" assert data.record_type == "8" assert data.check_digits == "14" assert data.parse_method == "full" def test_parse_with_spaces_in_amount(self, parser): """Test parsing with OCR-induced spaces in amount.""" text = "# 11000770600242 # 12 0 0 00 5 > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "11000770600242" assert data.amount == "1200.00" # Spaces removed assert data.account_number == "3082963" assert data.record_type == "5" assert data.check_digits == "41" def test_parse_with_spaces_in_check_digits(self, parser): """Test parsing with spaces around check digits: #41 # instead of #41#.""" text = "# 6026726908 # 736 00 9 > 5692041 #41 #" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "6026726908" assert data.amount == "736.00" assert data.account_number == "5692041" assert data.check_digits == "41" def test_parse_without_greater_than_symbol(self, parser): """Test parsing when > symbol is missing (OCR error).""" text = "# 11000770600242 # 1200 00 5 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "11000770600242" assert data.amount == "1200.00" assert data.account_number == "3082963" def test_parse_format_without_amount(self, parser): """Test parsing format without amount.""" text = "# 11000770600242 # > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "11000770600242" assert data.amount is None assert data.account_number == "3082963" assert data.check_digits == "41" assert data.parse_method == "no_amount" def test_parse_account_only_format(self, parser): """Test parsing account-only format.""" text = "> 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "" assert data.amount is None assert data.account_number == "3082963" assert data.check_digits == "41" assert data.parse_method == "account_only" assert "Partial" in data.error def test_parse_invalid_format(self, parser): """Test parsing invalid format.""" text = "This is not a payment line" data = parser.parse(text) assert not data.is_valid assert data.error is not None assert "No valid payment line format" in data.error def test_parse_empty_text(self, parser): """Test parsing empty text.""" data = parser.parse("") assert not data.is_valid assert data.error == "Empty payment line text" def test_format_machine_readable_full(self, parser): """Test formatting full data to machine-readable format.""" data = PaymentLineData( ocr_number="94228110015950070", amount="15658.00", account_number="48666036", record_type="8", check_digits="14", raw_text="original", is_valid=True ) formatted = parser.format_machine_readable(data) assert "# 94228110015950070 #" in formatted assert "15658 00 8" in formatted assert "48666036#14#" in formatted def test_format_machine_readable_no_amount(self, parser): """Test formatting data without amount.""" data = PaymentLineData( ocr_number="11000770600242", amount=None, account_number="3082963", record_type=None, check_digits="41", raw_text="original", is_valid=True ) formatted = parser.format_machine_readable(data) assert "# 11000770600242 #" in formatted assert "3082963#41#" in formatted def test_format_machine_readable_account_only(self, parser): """Test formatting account-only data.""" data = PaymentLineData( ocr_number="", amount=None, account_number="3082963", record_type=None, check_digits="41", raw_text="original", is_valid=True ) formatted = parser.format_machine_readable(data) assert "> 3082963#41#" in formatted def test_format_for_field_extractor_valid(self, parser): """Test formatting for FieldExtractor API (valid data).""" text = "# 6026726908 # 736 00 9 > 5692041#41#" data = parser.parse(text) formatted, is_valid, error = parser.format_for_field_extractor(data) assert is_valid assert formatted is not None assert "# 6026726908 #" in formatted assert "736 00" in formatted def test_format_for_field_extractor_invalid(self, parser): """Test formatting for FieldExtractor API (invalid data).""" text = "invalid payment line" data = parser.parse(text) formatted, is_valid, error = parser.format_for_field_extractor(data) assert not is_valid assert formatted is None assert error is not None class TestRealWorldExamples: """Test with real-world payment line examples from the codebase.""" @pytest.fixture def parser(self): """Create parser instance.""" return PaymentLineParser() def test_billo310_payment_line(self, parser): """Test Billo310 PDF payment line (from issue report).""" # This is the payment line that had Amount extraction issue text = "# 6026726908 # 736 00 9 > 5692041 #41 #" data = parser.parse(text) assert data.is_valid assert data.amount == "736.00" # Correct amount assert data.account_number == "5692041" def test_billo363_payment_line(self, parser): """Test Billo363 PDF payment line.""" text = "# 11000770600242 # 12 0 0 00 5 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.amount == "1200.00" assert data.ocr_number == "11000770600242" def test_payment_line_with_spaces_in_account(self, parser): """Test payment line with spaces in account number.""" text = "# 94228110015950070 # 15658 00 8 > 4 8 6 6 6 0 3 6#14#" data = parser.parse(text) assert data.is_valid assert data.account_number == "48666036" # Spaces removed def test_multiple_spaces_in_amounts(self, parser): """Test handling multiple spaces in amount.""" text = "# 11000770600242 # 1 2 0 0 00 5 > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.amount == "1200.00" class TestEdgeCases: """Test edge cases and error conditions.""" @pytest.fixture def parser(self): """Create parser instance.""" return PaymentLineParser() def test_very_long_ocr_number(self, parser): """Test with very long OCR number.""" text = "# 123456789012345678901234567890 # 1000 00 5 > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.ocr_number == "123456789012345678901234567890" def test_zero_amount(self, parser): """Test with zero amount.""" text = "# 11000770600242 # 0 00 5 > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.amount == "0.00" def test_large_amount(self, parser): """Test with large amount.""" text = "# 11000770600242 # 999999 99 5 > 3082963#41#" data = parser.parse(text) assert data.is_valid assert data.amount == "999999.99" def test_text_with_extra_characters(self, parser): """Test with extra characters around payment line.""" text = "Some text before # 6026726908 # 736 00 9 > 5692041#41# and after" data = parser.parse(text) assert data.is_valid assert data.amount == "736.00" def test_none_input(self, parser): """Test with None input.""" data = parser.parse(None) assert not data.is_valid assert data.error is not None def test_whitespace_only(self, parser): """Test with whitespace only.""" data = parser.parse(" \t\n ") assert not data.is_valid assert "Empty" in data.error