Add more tests

This commit is contained in:
Yaojia Wang
2026-02-01 22:40:41 +01:00
parent a564ac9d70
commit 400b12a967
55 changed files with 9306 additions and 267 deletions

View File

@@ -196,3 +196,121 @@ class TestAnnotationModel:
assert 0 <= ann.y_center <= 1
assert 0 <= ann.width <= 1
assert 0 <= ann.height <= 1
class TestAutoLabelFilePathResolution:
"""Tests for auto-label file path resolution.
The auto-label endpoint needs to resolve the storage path (e.g., "raw_pdfs/uuid.pdf")
to an actual filesystem path via the storage helper.
"""
def test_extracts_filename_from_storage_path(self):
"""Test that filename is extracted from storage path correctly."""
# Storage paths are like "raw_pdfs/uuid.pdf"
storage_path = "raw_pdfs/550e8400-e29b-41d4-a716-446655440000.pdf"
# The annotation endpoint extracts filename
filename = storage_path.split("/")[-1] if "/" in storage_path else storage_path
assert filename == "550e8400-e29b-41d4-a716-446655440000.pdf"
def test_handles_path_without_prefix(self):
"""Test that paths without prefix are handled."""
storage_path = "550e8400-e29b-41d4-a716-446655440000.pdf"
filename = storage_path.split("/")[-1] if "/" in storage_path else storage_path
assert filename == "550e8400-e29b-41d4-a716-446655440000.pdf"
def test_storage_helper_resolves_path(self):
"""Test that storage helper can resolve the path."""
from pathlib import Path
from unittest.mock import MagicMock, patch
# Mock storage helper
mock_storage = MagicMock()
mock_path = Path("/storage/raw_pdfs/test.pdf")
mock_storage.get_raw_pdf_local_path.return_value = mock_path
with patch(
"inference.web.services.storage_helpers.get_storage_helper",
return_value=mock_storage,
):
from inference.web.services.storage_helpers import get_storage_helper
storage = get_storage_helper()
result = storage.get_raw_pdf_local_path("test.pdf")
assert result == mock_path
mock_storage.get_raw_pdf_local_path.assert_called_once_with("test.pdf")
def test_auto_label_request_validation(self):
"""Test AutoLabelRequest validates field_values."""
# Valid request
request = AutoLabelRequest(
field_values={"InvoiceNumber": "12345"},
replace_existing=False,
)
assert request.field_values == {"InvoiceNumber": "12345"}
# Empty field_values should be valid at schema level
# (endpoint validates non-empty)
request_empty = AutoLabelRequest(
field_values={},
replace_existing=False,
)
assert request_empty.field_values == {}
class TestMatchClassAttributes:
"""Tests for Match class attributes used in auto-labeling.
The autolabel service uses Match objects from FieldMatcher.
Verifies the correct attribute names are used.
"""
def test_match_has_matched_text_attribute(self):
"""Test that Match class has matched_text attribute (not matched_value)."""
from shared.matcher.models import Match
# Create a Match object
match = Match(
field="invoice_number",
value="12345",
bbox=(100, 100, 200, 150),
page_no=0,
score=0.95,
matched_text="INV-12345",
context_keywords=["faktura", "nummer"],
)
# Verify matched_text exists (this is what autolabel.py should use)
assert hasattr(match, "matched_text")
assert match.matched_text == "INV-12345"
# Verify matched_value does NOT exist
# This was the bug - autolabel.py was using matched_value instead of matched_text
assert not hasattr(match, "matched_value")
def test_match_attributes_for_annotation_creation(self):
"""Test that Match has all attributes needed for annotation creation."""
from shared.matcher.models import Match
match = Match(
field="amount",
value="1000.00",
bbox=(50, 200, 150, 230),
page_no=0,
score=0.88,
matched_text="1 000,00",
context_keywords=["att betala", "summa"],
)
# These are all the attributes used in autolabel._create_annotations_from_matches
assert hasattr(match, "bbox")
assert hasattr(match, "matched_text") # NOT matched_value
assert hasattr(match, "score")
# Verify bbox format
assert len(match.bbox) == 4 # (x0, y0, x1, y1)