32 lines
784 B
Python
32 lines
784 B
Python
"""
|
|
OCR Number Normalizer
|
|
|
|
Normalizes OCR reference numbers (Swedish payment system).
|
|
"""
|
|
|
|
import re
|
|
from .base import BaseNormalizer
|
|
|
|
|
|
class OCRNormalizer(BaseNormalizer):
|
|
"""
|
|
Normalizes OCR reference numbers.
|
|
|
|
Similar to invoice number - primarily digits.
|
|
|
|
Examples:
|
|
'94228110015950070' -> ['94228110015950070']
|
|
'OCR: 94228110015950070' -> ['94228110015950070', 'OCR: 94228110015950070']
|
|
"""
|
|
|
|
def normalize(self, value: str) -> list[str]:
|
|
"""Generate variants of OCR number."""
|
|
value = self.clean_text(value)
|
|
digits_only = re.sub(r'\D', '', value)
|
|
|
|
variants = [value]
|
|
if digits_only and digits_only != value:
|
|
variants.append(digits_only)
|
|
|
|
return list(set(v for v in variants if v))
|