WIP
This commit is contained in:
176
tests/domain/test_document_classifier.py
Normal file
176
tests/domain/test_document_classifier.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
Tests for DocumentClassifier - TDD RED phase.
|
||||
|
||||
Test document type classification based on extracted fields.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from backend.domain.document_classifier import DocumentClassifier, ClassificationResult
|
||||
|
||||
|
||||
class TestDocumentClassifier:
|
||||
"""Test document classification logic."""
|
||||
|
||||
@pytest.fixture
|
||||
def classifier(self) -> DocumentClassifier:
|
||||
"""Create classifier instance."""
|
||||
return DocumentClassifier()
|
||||
|
||||
# ==================== Invoice Detection Tests ====================
|
||||
|
||||
def test_classify_with_payment_line_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Payment line is the strongest invoice indicator."""
|
||||
fields = {"payment_line": "# 123456 # 100 00 5 > 308-2963#"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.9
|
||||
assert "payment_line" in result.reason
|
||||
|
||||
def test_classify_with_multiple_indicators_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Multiple invoice indicators -> invoice with medium confidence."""
|
||||
fields = {
|
||||
"Amount": "1200.00",
|
||||
"Bankgiro": "123-4567",
|
||||
"payment_line": None,
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.7
|
||||
|
||||
def test_classify_with_ocr_and_amount_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""OCR + Amount is typical invoice pattern."""
|
||||
fields = {
|
||||
"OCR": "123456789012",
|
||||
"Amount": "500.00",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.7
|
||||
|
||||
def test_classify_with_single_indicator_returns_invoice_lower_confidence(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Single indicator -> invoice but lower confidence."""
|
||||
fields = {"Amount": "100.00"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert 0.5 <= result.confidence < 0.8
|
||||
|
||||
def test_classify_with_invoice_number_only(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Invoice number alone suggests invoice."""
|
||||
fields = {"InvoiceNumber": "INV-2024-001"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
|
||||
# ==================== Letter Detection Tests ====================
|
||||
|
||||
def test_classify_with_no_indicators_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""No invoice indicators -> letter."""
|
||||
fields: dict[str, str | None] = {}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
assert result.confidence >= 0.5
|
||||
|
||||
def test_classify_with_empty_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""All fields empty or None -> letter."""
|
||||
fields = {
|
||||
"payment_line": None,
|
||||
"OCR": None,
|
||||
"Amount": None,
|
||||
"Bankgiro": None,
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
def test_classify_with_only_non_indicator_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Fields that don't indicate invoice -> letter."""
|
||||
fields = {
|
||||
"CustomerNumber": "C12345",
|
||||
"SupplierOrgNumber": "556677-8899",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
# ==================== Edge Cases ====================
|
||||
|
||||
def test_classify_with_empty_string_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Empty strings should be treated as missing."""
|
||||
fields = {
|
||||
"payment_line": "",
|
||||
"Amount": "",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
def test_classify_with_whitespace_only_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Whitespace-only strings should be treated as missing."""
|
||||
fields = {
|
||||
"payment_line": " ",
|
||||
"Amount": "\t\n",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
# ==================== ClassificationResult Immutability ====================
|
||||
|
||||
def test_classification_result_is_immutable(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""ClassificationResult should be a frozen dataclass."""
|
||||
fields = {"payment_line": "test"}
|
||||
result = classifier.classify(fields)
|
||||
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
result.document_type = "modified" # type: ignore
|
||||
|
||||
def test_classification_result_has_required_fields(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""ClassificationResult must have document_type, confidence, reason."""
|
||||
fields = {"Amount": "100.00"}
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert hasattr(result, "document_type")
|
||||
assert hasattr(result, "confidence")
|
||||
assert hasattr(result, "reason")
|
||||
assert isinstance(result.document_type, str)
|
||||
assert isinstance(result.confidence, float)
|
||||
assert isinstance(result.reason, str)
|
||||
Reference in New Issue
Block a user