""" Tests for ExactMatcher strategy Usage: pytest tests/matcher/strategies/test_exact_matcher.py -v """ import pytest from dataclasses import dataclass from shared.matcher.strategies.exact_matcher import ExactMatcher @dataclass class MockToken: """Mock token for testing""" text: str bbox: tuple[float, float, float, float] page_no: int = 0 class TestExactMatcher: """Test ExactMatcher functionality""" @pytest.fixture def matcher(self): """Create matcher instance for testing""" return ExactMatcher(context_radius=200.0) def test_exact_match(self, matcher): """Exact text match should score 1.0""" tokens = [ MockToken('100017500321', (100, 100, 200, 120)), ] matches = matcher.find_matches(tokens, '100017500321', 'InvoiceNumber') assert len(matches) == 1 assert matches[0].score == 1.0 assert matches[0].matched_text == '100017500321' def test_case_insensitive_match(self, matcher): """Case-insensitive match should score 0.9 (digits-only for numeric fields)""" tokens = [ MockToken('INV-12345', (100, 100, 200, 120)), ] matches = matcher.find_matches(tokens, 'inv-12345', 'InvoiceNumber') assert len(matches) == 1 # Without token_index, case-insensitive falls through to digits-only match assert matches[0].score == 0.9 def test_digits_only_match(self, matcher): """Digits-only match for numeric fields should score 0.9""" tokens = [ MockToken('INV-12345', (100, 100, 200, 120)), ] matches = matcher.find_matches(tokens, '12345', 'InvoiceNumber') assert len(matches) == 1 assert matches[0].score == 0.9 def test_no_match(self, matcher): """Non-matching value should return empty list""" tokens = [ MockToken('100017500321', (100, 100, 200, 120)), ] matches = matcher.find_matches(tokens, '999999', 'InvoiceNumber') assert len(matches) == 0 def test_empty_tokens(self, matcher): """Empty token list should return empty matches""" matches = matcher.find_matches([], '100017500321', 'InvoiceNumber') assert len(matches) == 0