Files
invoice-master-poc-v2/tests/normalize/normalizers/test_ocr_normalizer.py
2026-01-27 23:58:17 +01:00

66 lines
2.2 KiB
Python

"""
Tests for OCRNormalizer
Usage:
pytest tests/normalize/normalizers/test_ocr_normalizer.py -v
"""
import pytest
from shared.normalize.normalizers.ocr_normalizer import OCRNormalizer
class TestOCRNormalizer:
"""Test OCRNormalizer functionality"""
@pytest.fixture
def normalizer(self):
"""Create normalizer instance for testing"""
return OCRNormalizer()
def test_pure_digits(self, normalizer):
"""Pure digit OCR number should return as-is"""
result = normalizer.normalize('94228110015950070')
assert '94228110015950070' in result
assert len(result) == 1
def test_with_prefix(self, normalizer):
"""OCR number with prefix should extract digits and keep original"""
result = normalizer.normalize('OCR: 94228110015950070')
assert 'OCR: 94228110015950070' in result
assert '94228110015950070' in result
def test_with_spaces(self, normalizer):
"""OCR number with spaces should be normalized"""
result = normalizer.normalize('9422 8110 0159 50070')
assert '94228110015950070' in result
def test_with_hyphens(self, normalizer):
"""OCR number with hyphens should be normalized"""
result = normalizer.normalize('1234-5678-9012')
assert '123456789012' in result
def test_empty_string(self, normalizer):
"""Empty string should return empty list"""
result = normalizer('')
assert result == []
def test_none_value(self, normalizer):
"""None value should return empty list"""
result = normalizer(None)
assert result == []
def test_callable_interface(self, normalizer):
"""Normalizer should be callable via __call__"""
result = normalizer('OCR-12345')
assert '12345' in result
def test_mixed_separators(self, normalizer):
"""OCR number with mixed separators should be normalized"""
result = normalizer.normalize('123 456-789 012')
assert '123456789012' in result
def test_very_long_ocr(self, normalizer):
"""Very long OCR number should be handled"""
result = normalizer.normalize('12345678901234567890')
assert '12345678901234567890' in result