66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
"""
|
|
Tests for OCRNormalizer
|
|
|
|
Usage:
|
|
pytest tests/normalize/normalizers/test_ocr_normalizer.py -v
|
|
"""
|
|
|
|
import pytest
|
|
from shared.normalize.normalizers.ocr_normalizer import OCRNormalizer
|
|
|
|
|
|
class TestOCRNormalizer:
|
|
"""Test OCRNormalizer functionality"""
|
|
|
|
@pytest.fixture
|
|
def normalizer(self):
|
|
"""Create normalizer instance for testing"""
|
|
return OCRNormalizer()
|
|
|
|
def test_pure_digits(self, normalizer):
|
|
"""Pure digit OCR number should return as-is"""
|
|
result = normalizer.normalize('94228110015950070')
|
|
assert '94228110015950070' in result
|
|
assert len(result) == 1
|
|
|
|
def test_with_prefix(self, normalizer):
|
|
"""OCR number with prefix should extract digits and keep original"""
|
|
result = normalizer.normalize('OCR: 94228110015950070')
|
|
assert 'OCR: 94228110015950070' in result
|
|
assert '94228110015950070' in result
|
|
|
|
def test_with_spaces(self, normalizer):
|
|
"""OCR number with spaces should be normalized"""
|
|
result = normalizer.normalize('9422 8110 0159 50070')
|
|
assert '94228110015950070' in result
|
|
|
|
def test_with_hyphens(self, normalizer):
|
|
"""OCR number with hyphens should be normalized"""
|
|
result = normalizer.normalize('1234-5678-9012')
|
|
assert '123456789012' in result
|
|
|
|
def test_empty_string(self, normalizer):
|
|
"""Empty string should return empty list"""
|
|
result = normalizer('')
|
|
assert result == []
|
|
|
|
def test_none_value(self, normalizer):
|
|
"""None value should return empty list"""
|
|
result = normalizer(None)
|
|
assert result == []
|
|
|
|
def test_callable_interface(self, normalizer):
|
|
"""Normalizer should be callable via __call__"""
|
|
result = normalizer('OCR-12345')
|
|
assert '12345' in result
|
|
|
|
def test_mixed_separators(self, normalizer):
|
|
"""OCR number with mixed separators should be normalized"""
|
|
result = normalizer.normalize('123 456-789 012')
|
|
assert '123456789012' in result
|
|
|
|
def test_very_long_ocr(self, normalizer):
|
|
"""Very long OCR number should be handled"""
|
|
result = normalizer.normalize('12345678901234567890')
|
|
assert '12345678901234567890' in result
|