This commit is contained in:
Yaojia Wang
2026-02-03 22:29:53 +01:00
parent 4c7fc3015c
commit c2c8f2dd04
10 changed files with 786 additions and 68 deletions

1
tests/domain/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Domain layer tests

View File

@@ -0,0 +1,176 @@
"""
Tests for DocumentClassifier - TDD RED phase.
Test document type classification based on extracted fields.
"""
import pytest
from backend.domain.document_classifier import DocumentClassifier, ClassificationResult
class TestDocumentClassifier:
"""Test document classification logic."""
@pytest.fixture
def classifier(self) -> DocumentClassifier:
"""Create classifier instance."""
return DocumentClassifier()
# ==================== Invoice Detection Tests ====================
def test_classify_with_payment_line_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""Payment line is the strongest invoice indicator."""
fields = {"payment_line": "# 123456 # 100 00 5 > 308-2963#"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.9
assert "payment_line" in result.reason
def test_classify_with_multiple_indicators_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""Multiple invoice indicators -> invoice with medium confidence."""
fields = {
"Amount": "1200.00",
"Bankgiro": "123-4567",
"payment_line": None,
}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.7
def test_classify_with_ocr_and_amount_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""OCR + Amount is typical invoice pattern."""
fields = {
"OCR": "123456789012",
"Amount": "500.00",
}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.7
def test_classify_with_single_indicator_returns_invoice_lower_confidence(
self, classifier: DocumentClassifier
) -> None:
"""Single indicator -> invoice but lower confidence."""
fields = {"Amount": "100.00"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert 0.5 <= result.confidence < 0.8
def test_classify_with_invoice_number_only(
self, classifier: DocumentClassifier
) -> None:
"""Invoice number alone suggests invoice."""
fields = {"InvoiceNumber": "INV-2024-001"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
# ==================== Letter Detection Tests ====================
def test_classify_with_no_indicators_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""No invoice indicators -> letter."""
fields: dict[str, str | None] = {}
result = classifier.classify(fields)
assert result.document_type == "letter"
assert result.confidence >= 0.5
def test_classify_with_empty_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""All fields empty or None -> letter."""
fields = {
"payment_line": None,
"OCR": None,
"Amount": None,
"Bankgiro": None,
}
result = classifier.classify(fields)
assert result.document_type == "letter"
def test_classify_with_only_non_indicator_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Fields that don't indicate invoice -> letter."""
fields = {
"CustomerNumber": "C12345",
"SupplierOrgNumber": "556677-8899",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
# ==================== Edge Cases ====================
def test_classify_with_empty_string_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Empty strings should be treated as missing."""
fields = {
"payment_line": "",
"Amount": "",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
def test_classify_with_whitespace_only_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Whitespace-only strings should be treated as missing."""
fields = {
"payment_line": " ",
"Amount": "\t\n",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
# ==================== ClassificationResult Immutability ====================
def test_classification_result_is_immutable(
self, classifier: DocumentClassifier
) -> None:
"""ClassificationResult should be a frozen dataclass."""
fields = {"payment_line": "test"}
result = classifier.classify(fields)
with pytest.raises((AttributeError, TypeError)):
result.document_type = "modified" # type: ignore
def test_classification_result_has_required_fields(
self, classifier: DocumentClassifier
) -> None:
"""ClassificationResult must have document_type, confidence, reason."""
fields = {"Amount": "100.00"}
result = classifier.classify(fields)
assert hasattr(result, "document_type")
assert hasattr(result, "confidence")
assert hasattr(result, "reason")
assert isinstance(result.document_type, str)
assert isinstance(result.confidence, float)
assert isinstance(result.reason, str)

View File

@@ -0,0 +1,232 @@
"""
Tests for InvoiceValidator - TDD RED phase.
Test invoice field validation logic.
"""
import pytest
from backend.domain.invoice_validator import (
InvoiceValidator,
ValidationResult,
ValidationIssue,
)
class TestInvoiceValidator:
"""Test invoice validation logic."""
@pytest.fixture
def validator(self) -> InvoiceValidator:
"""Create validator instance with default settings."""
return InvoiceValidator()
@pytest.fixture
def validator_strict(self) -> InvoiceValidator:
"""Create validator with strict confidence threshold."""
return InvoiceValidator(min_confidence=0.8)
# ==================== Valid Invoice Tests ====================
def test_validate_complete_invoice_is_valid(
self, validator: InvoiceValidator
) -> None:
"""Complete invoice with all required fields is valid."""
fields = {
"Amount": "1200.00",
"OCR": "123456789012",
"Bankgiro": "123-4567",
}
confidence = {
"Amount": 0.95,
"OCR": 0.90,
"Bankgiro": 0.85,
}
result = validator.validate(fields, confidence)
assert result.is_valid is True
assert len([i for i in result.issues if i.severity == "error"]) == 0
def test_validate_invoice_with_payment_line_is_valid(
self, validator: InvoiceValidator
) -> None:
"""Invoice with payment_line as payment reference is valid."""
fields = {
"Amount": "500.00",
"payment_line": "# 123 # 500 00 5 > 308#",
}
confidence = {"Amount": 0.9, "payment_line": 0.85}
result = validator.validate(fields, confidence)
assert result.is_valid is True
# ==================== Invalid Invoice Tests ====================
def test_validate_missing_amount_is_invalid(
self, validator: InvoiceValidator
) -> None:
"""Missing Amount field should produce error."""
fields = {
"OCR": "123456789012",
"Bankgiro": "123-4567",
}
confidence = {"OCR": 0.9, "Bankgiro": 0.85}
result = validator.validate(fields, confidence)
assert result.is_valid is False
error_fields = [i.field for i in result.issues if i.severity == "error"]
assert "Amount" in error_fields
def test_validate_missing_payment_reference_produces_warning(
self, validator: InvoiceValidator
) -> None:
"""Missing all payment references should produce warning."""
fields = {"Amount": "1200.00"}
confidence = {"Amount": 0.9}
result = validator.validate(fields, confidence)
# Missing payment ref is warning, not error
warning_fields = [i.field for i in result.issues if i.severity == "warning"]
assert "payment_reference" in warning_fields
# ==================== Confidence Threshold Tests ====================
def test_validate_low_confidence_produces_warning(
self, validator: InvoiceValidator
) -> None:
"""Fields below confidence threshold should produce warning."""
fields = {
"Amount": "1200.00",
"OCR": "123456789012",
}
confidence = {
"Amount": 0.9,
"OCR": 0.3, # Below default threshold of 0.5
}
result = validator.validate(fields, confidence)
low_conf_warnings = [
i for i in result.issues
if i.severity == "warning" and "confidence" in i.message.lower()
]
assert len(low_conf_warnings) > 0
def test_validate_strict_threshold_more_warnings(
self, validator_strict: InvoiceValidator
) -> None:
"""Strict validator should produce more warnings."""
fields = {
"Amount": "1200.00",
"OCR": "123456789012",
}
confidence = {
"Amount": 0.7, # Below 0.8 threshold
"OCR": 0.6, # Below 0.8 threshold
}
result = validator_strict.validate(fields, confidence)
low_conf_warnings = [
i for i in result.issues
if i.severity == "warning" and "confidence" in i.message.lower()
]
assert len(low_conf_warnings) >= 2
# ==================== Edge Cases ====================
def test_validate_empty_fields_is_invalid(
self, validator: InvoiceValidator
) -> None:
"""Empty fields dict should be invalid."""
fields: dict[str, str | None] = {}
confidence: dict[str, float] = {}
result = validator.validate(fields, confidence)
assert result.is_valid is False
def test_validate_none_field_values_treated_as_missing(
self, validator: InvoiceValidator
) -> None:
"""None values should be treated as missing."""
fields = {
"Amount": None,
"OCR": "123456789012",
}
confidence = {"OCR": 0.9}
result = validator.validate(fields, confidence)
assert result.is_valid is False
error_fields = [i.field for i in result.issues if i.severity == "error"]
assert "Amount" in error_fields
def test_validate_empty_string_treated_as_missing(
self, validator: InvoiceValidator
) -> None:
"""Empty string should be treated as missing."""
fields = {
"Amount": "",
"OCR": "123456789012",
}
confidence = {"OCR": 0.9}
result = validator.validate(fields, confidence)
assert result.is_valid is False
# ==================== ValidationResult Properties ====================
def test_validation_result_is_immutable(
self, validator: InvoiceValidator
) -> None:
"""ValidationResult should be a frozen dataclass."""
fields = {"Amount": "100.00", "OCR": "123"}
confidence = {"Amount": 0.9, "OCR": 0.9}
result = validator.validate(fields, confidence)
with pytest.raises((AttributeError, TypeError)):
result.is_valid = False # type: ignore
def test_validation_result_issues_is_tuple(
self, validator: InvoiceValidator
) -> None:
"""Issues should be a tuple (immutable)."""
fields = {"Amount": "100.00"}
confidence = {"Amount": 0.9}
result = validator.validate(fields, confidence)
assert isinstance(result.issues, tuple)
def test_validation_result_has_confidence(
self, validator: InvoiceValidator
) -> None:
"""ValidationResult should have confidence score."""
fields = {"Amount": "100.00", "OCR": "123"}
confidence = {"Amount": 0.9, "OCR": 0.8}
result = validator.validate(fields, confidence)
assert hasattr(result, "confidence")
assert 0.0 <= result.confidence <= 1.0
# ==================== ValidationIssue Tests ====================
def test_validation_issue_has_required_fields(
self, validator: InvoiceValidator
) -> None:
"""ValidationIssue must have field, severity, message."""
fields: dict[str, str | None] = {}
confidence: dict[str, float] = {}
result = validator.validate(fields, confidence)
assert len(result.issues) > 0
issue = result.issues[0]
assert hasattr(issue, "field")
assert hasattr(issue, "severity")
assert hasattr(issue, "message")
assert issue.severity in ("error", "warning", "info")

View File

@@ -232,10 +232,8 @@ class TestInferenceServicePDFRendering:
@patch('backend.pipeline.pipeline.InferencePipeline')
@patch('backend.pipeline.yolo_detector.YOLODetector')
@patch('shared.pdf.renderer.render_pdf_to_images')
@patch('ultralytics.YOLO')
def test_pdf_visualization_imports_correctly(
self,
mock_yolo_class,
mock_render_pdf,
mock_yolo_detector,
mock_pipeline,
@@ -248,12 +246,22 @@ class TestInferenceServicePDFRendering:
This catches the import error we had with:
from ..pdf.renderer (wrong) vs from shared.pdf.renderer (correct)
"""
# Setup mocks
# Setup mocks for detector
mock_detector_instance = Mock()
mock_pipeline_instance = Mock()
mock_model = Mock()
mock_result = Mock()
mock_result.save = Mock()
mock_model.predict.return_value = [mock_result]
mock_detector_instance.model = mock_model
mock_yolo_detector.return_value = mock_detector_instance
# Setup mock for pipeline
mock_pipeline_instance = Mock()
mock_pipeline.return_value = mock_pipeline_instance
# Initialize service to setup _detector
inference_service.initialize()
# Create a fake PDF path
pdf_path = tmp_path / "test.pdf"
pdf_path.touch()
@@ -264,18 +272,12 @@ class TestInferenceServicePDFRendering:
img.save(image_bytes, format='PNG')
mock_render_pdf.return_value = [(1, image_bytes.getvalue())]
# Mock YOLO
mock_model_instance = Mock()
mock_result = Mock()
mock_result.save = Mock()
mock_model_instance.predict.return_value = [mock_result]
mock_yolo_class.return_value = mock_model_instance
# This should not raise ImportError
# This should not raise ImportError and should use self._detector.model
result_path = inference_service._save_pdf_visualization(pdf_path, "test123")
# Verify import was successful
# Verify import was successful and detector.model was used
mock_render_pdf.assert_called_once()
mock_model.predict.assert_called_once()
assert result_path is not None