WIP
This commit is contained in:
@@ -42,6 +42,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from src.pdf.extractor import Token as TextToken
|
||||
from src.utils.validators import FieldValidators
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -484,21 +485,42 @@ class MachineCodeParser:
|
||||
def format_account(account_digits: str) -> tuple[str, str]:
|
||||
"""Format account and determine type (bankgiro or plusgiro).
|
||||
|
||||
Uses context keywords first, then falls back to Luhn validation
|
||||
to determine the most likely account type.
|
||||
|
||||
Returns: (formatted_account, account_type)
|
||||
"""
|
||||
if is_plusgiro_context:
|
||||
# Plusgiro format: XXXXXXX-X
|
||||
# Context explicitly indicates Plusgiro
|
||||
formatted = f"{account_digits[:-1]}-{account_digits[-1]}"
|
||||
return formatted, 'plusgiro'
|
||||
|
||||
# No explicit context - use Luhn validation to determine type
|
||||
# Try both formats and see which passes Luhn check
|
||||
|
||||
# Format as Plusgiro: XXXXXXX-X (all digits, check digit at end)
|
||||
pg_formatted = f"{account_digits[:-1]}-{account_digits[-1]}"
|
||||
pg_valid = FieldValidators.is_valid_plusgiro(account_digits)
|
||||
|
||||
# Format as Bankgiro: XXX-XXXX or XXXX-XXXX
|
||||
if len(account_digits) == 7:
|
||||
bg_formatted = f"{account_digits[:3]}-{account_digits[3:]}"
|
||||
elif len(account_digits) == 8:
|
||||
bg_formatted = f"{account_digits[:4]}-{account_digits[4:]}"
|
||||
else:
|
||||
# Bankgiro format: XXX-XXXX or XXXX-XXXX
|
||||
if len(account_digits) == 7:
|
||||
formatted = f"{account_digits[:3]}-{account_digits[3:]}"
|
||||
elif len(account_digits) == 8:
|
||||
formatted = f"{account_digits[:4]}-{account_digits[4:]}"
|
||||
else:
|
||||
formatted = account_digits
|
||||
return formatted, 'bankgiro'
|
||||
bg_formatted = account_digits
|
||||
bg_valid = FieldValidators.is_valid_bankgiro(account_digits)
|
||||
|
||||
# Decision logic:
|
||||
# 1. If only one format passes Luhn, use that
|
||||
# 2. If both pass or both fail, default to Bankgiro (more common in payment lines)
|
||||
if pg_valid and not bg_valid:
|
||||
return pg_formatted, 'plusgiro'
|
||||
elif bg_valid and not pg_valid:
|
||||
return bg_formatted, 'bankgiro'
|
||||
else:
|
||||
# Both valid or both invalid - default to bankgiro
|
||||
return bg_formatted, 'bankgiro'
|
||||
|
||||
# Try primary pattern
|
||||
match = self.PAYMENT_LINE_PATTERN.search(raw_line)
|
||||
|
||||
Reference in New Issue
Block a user