70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
"""
|
|
Tests for ExactMatcher strategy
|
|
|
|
Usage:
|
|
pytest tests/matcher/strategies/test_exact_matcher.py -v
|
|
"""
|
|
|
|
import pytest
|
|
from dataclasses import dataclass
|
|
from src.matcher.strategies.exact_matcher import ExactMatcher
|
|
|
|
|
|
@dataclass
|
|
class MockToken:
|
|
"""Mock token for testing"""
|
|
text: str
|
|
bbox: tuple[float, float, float, float]
|
|
page_no: int = 0
|
|
|
|
|
|
class TestExactMatcher:
|
|
"""Test ExactMatcher functionality"""
|
|
|
|
@pytest.fixture
|
|
def matcher(self):
|
|
"""Create matcher instance for testing"""
|
|
return ExactMatcher(context_radius=200.0)
|
|
|
|
def test_exact_match(self, matcher):
|
|
"""Exact text match should score 1.0"""
|
|
tokens = [
|
|
MockToken('100017500321', (100, 100, 200, 120)),
|
|
]
|
|
matches = matcher.find_matches(tokens, '100017500321', 'InvoiceNumber')
|
|
assert len(matches) == 1
|
|
assert matches[0].score == 1.0
|
|
assert matches[0].matched_text == '100017500321'
|
|
|
|
def test_case_insensitive_match(self, matcher):
|
|
"""Case-insensitive match should score 0.9 (digits-only for numeric fields)"""
|
|
tokens = [
|
|
MockToken('INV-12345', (100, 100, 200, 120)),
|
|
]
|
|
matches = matcher.find_matches(tokens, 'inv-12345', 'InvoiceNumber')
|
|
assert len(matches) == 1
|
|
# Without token_index, case-insensitive falls through to digits-only match
|
|
assert matches[0].score == 0.9
|
|
|
|
def test_digits_only_match(self, matcher):
|
|
"""Digits-only match for numeric fields should score 0.9"""
|
|
tokens = [
|
|
MockToken('INV-12345', (100, 100, 200, 120)),
|
|
]
|
|
matches = matcher.find_matches(tokens, '12345', 'InvoiceNumber')
|
|
assert len(matches) == 1
|
|
assert matches[0].score == 0.9
|
|
|
|
def test_no_match(self, matcher):
|
|
"""Non-matching value should return empty list"""
|
|
tokens = [
|
|
MockToken('100017500321', (100, 100, 200, 120)),
|
|
]
|
|
matches = matcher.find_matches(tokens, '999999', 'InvoiceNumber')
|
|
assert len(matches) == 0
|
|
|
|
def test_empty_tokens(self, matcher):
|
|
"""Empty token list should return empty matches"""
|
|
matches = matcher.find_matches([], '100017500321', 'InvoiceNumber')
|
|
assert len(matches) == 0
|