38 lines
882 B
Python
38 lines
882 B
Python
"""
|
|
OCR Number Normalizer
|
|
|
|
Handles normalization and validation of OCR reference numbers.
|
|
"""
|
|
|
|
import re
|
|
|
|
from .base import BaseNormalizer, NormalizationResult
|
|
|
|
|
|
class OcrNumberNormalizer(BaseNormalizer):
|
|
"""
|
|
Normalizes OCR (Optical Character Recognition) reference numbers.
|
|
|
|
OCR numbers in Swedish payment systems:
|
|
- Minimum 5 digits
|
|
- Used for automated payment matching
|
|
"""
|
|
|
|
@property
|
|
def field_name(self) -> str:
|
|
return "OCR"
|
|
|
|
def normalize(self, text: str) -> NormalizationResult:
|
|
text = text.strip()
|
|
if not text:
|
|
return NormalizationResult.failure("Empty text")
|
|
|
|
digits = re.sub(r"\D", "", text)
|
|
|
|
if len(digits) < 5:
|
|
return NormalizationResult.failure(
|
|
f"Too few digits for OCR: {len(digits)}"
|
|
)
|
|
|
|
return NormalizationResult.success(digits)
|