Files
invoice-master-poc-v2/src/normalize/normalizers/ocr_normalizer.py
2026-01-25 15:21:11 +01:00

32 lines
784 B
Python

"""
OCR Number Normalizer
Normalizes OCR reference numbers (Swedish payment system).
"""
import re
from .base import BaseNormalizer
class OCRNormalizer(BaseNormalizer):
"""
Normalizes OCR reference numbers.
Similar to invoice number - primarily digits.
Examples:
'94228110015950070' -> ['94228110015950070']
'OCR: 94228110015950070' -> ['94228110015950070', 'OCR: 94228110015950070']
"""
def normalize(self, value: str) -> list[str]:
"""Generate variants of OCR number."""
value = self.clean_text(value)
digits_only = re.sub(r'\D', '', value)
variants = [value]
if digits_only and digits_only != value:
variants.append(digits_only)
return list(set(v for v in variants if v))