Re-structure the project.
This commit is contained in:
69
tests/matcher/strategies/test_exact_matcher.py
Normal file
69
tests/matcher/strategies/test_exact_matcher.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Tests for ExactMatcher strategy
|
||||
|
||||
Usage:
|
||||
pytest tests/matcher/strategies/test_exact_matcher.py -v
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from dataclasses import dataclass
|
||||
from src.matcher.strategies.exact_matcher import ExactMatcher
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockToken:
|
||||
"""Mock token for testing"""
|
||||
text: str
|
||||
bbox: tuple[float, float, float, float]
|
||||
page_no: int = 0
|
||||
|
||||
|
||||
class TestExactMatcher:
|
||||
"""Test ExactMatcher functionality"""
|
||||
|
||||
@pytest.fixture
|
||||
def matcher(self):
|
||||
"""Create matcher instance for testing"""
|
||||
return ExactMatcher(context_radius=200.0)
|
||||
|
||||
def test_exact_match(self, matcher):
|
||||
"""Exact text match should score 1.0"""
|
||||
tokens = [
|
||||
MockToken('100017500321', (100, 100, 200, 120)),
|
||||
]
|
||||
matches = matcher.find_matches(tokens, '100017500321', 'InvoiceNumber')
|
||||
assert len(matches) == 1
|
||||
assert matches[0].score == 1.0
|
||||
assert matches[0].matched_text == '100017500321'
|
||||
|
||||
def test_case_insensitive_match(self, matcher):
|
||||
"""Case-insensitive match should score 0.9 (digits-only for numeric fields)"""
|
||||
tokens = [
|
||||
MockToken('INV-12345', (100, 100, 200, 120)),
|
||||
]
|
||||
matches = matcher.find_matches(tokens, 'inv-12345', 'InvoiceNumber')
|
||||
assert len(matches) == 1
|
||||
# Without token_index, case-insensitive falls through to digits-only match
|
||||
assert matches[0].score == 0.9
|
||||
|
||||
def test_digits_only_match(self, matcher):
|
||||
"""Digits-only match for numeric fields should score 0.9"""
|
||||
tokens = [
|
||||
MockToken('INV-12345', (100, 100, 200, 120)),
|
||||
]
|
||||
matches = matcher.find_matches(tokens, '12345', 'InvoiceNumber')
|
||||
assert len(matches) == 1
|
||||
assert matches[0].score == 0.9
|
||||
|
||||
def test_no_match(self, matcher):
|
||||
"""Non-matching value should return empty list"""
|
||||
tokens = [
|
||||
MockToken('100017500321', (100, 100, 200, 120)),
|
||||
]
|
||||
matches = matcher.find_matches(tokens, '999999', 'InvoiceNumber')
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_empty_tokens(self, matcher):
|
||||
"""Empty token list should return empty matches"""
|
||||
matches = matcher.find_matches([], '100017500321', 'InvoiceNumber')
|
||||
assert len(matches) == 0
|
||||
Reference in New Issue
Block a user