Re-structure the project.
This commit is contained in:
65
tests/normalize/normalizers/test_ocr_normalizer.py
Normal file
65
tests/normalize/normalizers/test_ocr_normalizer.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Tests for OCRNormalizer
|
||||
|
||||
Usage:
|
||||
pytest tests/normalize/normalizers/test_ocr_normalizer.py -v
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from src.normalize.normalizers.ocr_normalizer import OCRNormalizer
|
||||
|
||||
|
||||
class TestOCRNormalizer:
|
||||
"""Test OCRNormalizer functionality"""
|
||||
|
||||
@pytest.fixture
|
||||
def normalizer(self):
|
||||
"""Create normalizer instance for testing"""
|
||||
return OCRNormalizer()
|
||||
|
||||
def test_pure_digits(self, normalizer):
|
||||
"""Pure digit OCR number should return as-is"""
|
||||
result = normalizer.normalize('94228110015950070')
|
||||
assert '94228110015950070' in result
|
||||
assert len(result) == 1
|
||||
|
||||
def test_with_prefix(self, normalizer):
|
||||
"""OCR number with prefix should extract digits and keep original"""
|
||||
result = normalizer.normalize('OCR: 94228110015950070')
|
||||
assert 'OCR: 94228110015950070' in result
|
||||
assert '94228110015950070' in result
|
||||
|
||||
def test_with_spaces(self, normalizer):
|
||||
"""OCR number with spaces should be normalized"""
|
||||
result = normalizer.normalize('9422 8110 0159 50070')
|
||||
assert '94228110015950070' in result
|
||||
|
||||
def test_with_hyphens(self, normalizer):
|
||||
"""OCR number with hyphens should be normalized"""
|
||||
result = normalizer.normalize('1234-5678-9012')
|
||||
assert '123456789012' in result
|
||||
|
||||
def test_empty_string(self, normalizer):
|
||||
"""Empty string should return empty list"""
|
||||
result = normalizer('')
|
||||
assert result == []
|
||||
|
||||
def test_none_value(self, normalizer):
|
||||
"""None value should return empty list"""
|
||||
result = normalizer(None)
|
||||
assert result == []
|
||||
|
||||
def test_callable_interface(self, normalizer):
|
||||
"""Normalizer should be callable via __call__"""
|
||||
result = normalizer('OCR-12345')
|
||||
assert '12345' in result
|
||||
|
||||
def test_mixed_separators(self, normalizer):
|
||||
"""OCR number with mixed separators should be normalized"""
|
||||
result = normalizer.normalize('123 456-789 012')
|
||||
assert '123456789012' in result
|
||||
|
||||
def test_very_long_ocr(self, normalizer):
|
||||
"""Very long OCR number should be handled"""
|
||||
result = normalizer.normalize('12345678901234567890')
|
||||
assert '12345678901234567890' in result
|
||||
Reference in New Issue
Block a user