Files
invoice-master-poc-v2/packages/inference/inference/pipeline/normalizers/ocr_number.py
Yaojia Wang a564ac9d70 WIP
2026-02-01 18:51:54 +01:00

38 lines
882 B
Python

"""
OCR Number Normalizer
Handles normalization and validation of OCR reference numbers.
"""
import re
from .base import BaseNormalizer, NormalizationResult
class OcrNumberNormalizer(BaseNormalizer):
"""
Normalizes OCR (Optical Character Recognition) reference numbers.
OCR numbers in Swedish payment systems:
- Minimum 5 digits
- Used for automated payment matching
"""
@property
def field_name(self) -> str:
return "OCR"
def normalize(self, text: str) -> NormalizationResult:
text = text.strip()
if not text:
return NormalizationResult.failure("Empty text")
digits = re.sub(r"\D", "", text)
if len(digits) < 5:
return NormalizationResult.failure(
f"Too few digits for OCR: {len(digits)}"
)
return NormalizationResult.success(digits)