""" Tests for OCRNormalizer Usage: pytest tests/normalize/normalizers/test_ocr_normalizer.py -v """ import pytest from src.normalize.normalizers.ocr_normalizer import OCRNormalizer class TestOCRNormalizer: """Test OCRNormalizer functionality""" @pytest.fixture def normalizer(self): """Create normalizer instance for testing""" return OCRNormalizer() def test_pure_digits(self, normalizer): """Pure digit OCR number should return as-is""" result = normalizer.normalize('94228110015950070') assert '94228110015950070' in result assert len(result) == 1 def test_with_prefix(self, normalizer): """OCR number with prefix should extract digits and keep original""" result = normalizer.normalize('OCR: 94228110015950070') assert 'OCR: 94228110015950070' in result assert '94228110015950070' in result def test_with_spaces(self, normalizer): """OCR number with spaces should be normalized""" result = normalizer.normalize('9422 8110 0159 50070') assert '94228110015950070' in result def test_with_hyphens(self, normalizer): """OCR number with hyphens should be normalized""" result = normalizer.normalize('1234-5678-9012') assert '123456789012' in result def test_empty_string(self, normalizer): """Empty string should return empty list""" result = normalizer('') assert result == [] def test_none_value(self, normalizer): """None value should return empty list""" result = normalizer(None) assert result == [] def test_callable_interface(self, normalizer): """Normalizer should be callable via __call__""" result = normalizer('OCR-12345') assert '12345' in result def test_mixed_separators(self, normalizer): """OCR number with mixed separators should be normalized""" result = normalizer.normalize('123 456-789 012') assert '123456789012' in result def test_very_long_ocr(self, normalizer): """Very long OCR number should be handled""" result = normalizer.normalize('12345678901234567890') assert '12345678901234567890' in result