Re-structure the project.

This commit is contained in:
Yaojia Wang
2026-01-25 15:21:11 +01:00
parent 8fd61ea928
commit e599424a92
80 changed files with 10672 additions and 1584 deletions

View File

@@ -0,0 +1,52 @@
"""
Fuzzy match strategy for amounts and dates.
"""
from .base import BaseMatchStrategy
from ..models import TokenLike, Match
from ..token_index import TokenIndex
from ..context import find_context_keywords
from ..utils import parse_amount
class FuzzyMatcher(BaseMatchStrategy):
"""Find approximate matches for amounts and dates."""
def find_matches(
self,
tokens: list[TokenLike],
value: str,
field_name: str,
token_index: TokenIndex | None = None
) -> list[Match]:
"""Find fuzzy matches."""
matches = []
for token in tokens:
token_text = token.text.strip()
if field_name == 'Amount':
# Try to parse both as numbers
try:
token_num = parse_amount(token_text)
value_num = parse_amount(value)
if token_num is not None and value_num is not None:
if abs(token_num - value_num) < 0.01: # Within 1 cent
context_keywords, context_boost = find_context_keywords(
tokens, token, field_name, self.context_radius, token_index
)
matches.append(Match(
field=field_name,
value=value,
bbox=token.bbox,
page_no=token.page_no,
score=min(1.0, 0.8 + context_boost),
matched_text=token_text,
context_keywords=context_keywords
))
except:
pass
return matches