Files
invoice-master-poc-v2/tests/domain/test_document_classifier.py
Yaojia Wang c2c8f2dd04 WIP
2026-02-03 22:29:53 +01:00

177 lines
5.5 KiB
Python

"""
Tests for DocumentClassifier - TDD RED phase.
Test document type classification based on extracted fields.
"""
import pytest
from backend.domain.document_classifier import DocumentClassifier, ClassificationResult
class TestDocumentClassifier:
"""Test document classification logic."""
@pytest.fixture
def classifier(self) -> DocumentClassifier:
"""Create classifier instance."""
return DocumentClassifier()
# ==================== Invoice Detection Tests ====================
def test_classify_with_payment_line_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""Payment line is the strongest invoice indicator."""
fields = {"payment_line": "# 123456 # 100 00 5 > 308-2963#"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.9
assert "payment_line" in result.reason
def test_classify_with_multiple_indicators_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""Multiple invoice indicators -> invoice with medium confidence."""
fields = {
"Amount": "1200.00",
"Bankgiro": "123-4567",
"payment_line": None,
}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.7
def test_classify_with_ocr_and_amount_returns_invoice(
self, classifier: DocumentClassifier
) -> None:
"""OCR + Amount is typical invoice pattern."""
fields = {
"OCR": "123456789012",
"Amount": "500.00",
}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert result.confidence >= 0.7
def test_classify_with_single_indicator_returns_invoice_lower_confidence(
self, classifier: DocumentClassifier
) -> None:
"""Single indicator -> invoice but lower confidence."""
fields = {"Amount": "100.00"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
assert 0.5 <= result.confidence < 0.8
def test_classify_with_invoice_number_only(
self, classifier: DocumentClassifier
) -> None:
"""Invoice number alone suggests invoice."""
fields = {"InvoiceNumber": "INV-2024-001"}
result = classifier.classify(fields)
assert result.document_type == "invoice"
# ==================== Letter Detection Tests ====================
def test_classify_with_no_indicators_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""No invoice indicators -> letter."""
fields: dict[str, str | None] = {}
result = classifier.classify(fields)
assert result.document_type == "letter"
assert result.confidence >= 0.5
def test_classify_with_empty_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""All fields empty or None -> letter."""
fields = {
"payment_line": None,
"OCR": None,
"Amount": None,
"Bankgiro": None,
}
result = classifier.classify(fields)
assert result.document_type == "letter"
def test_classify_with_only_non_indicator_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Fields that don't indicate invoice -> letter."""
fields = {
"CustomerNumber": "C12345",
"SupplierOrgNumber": "556677-8899",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
# ==================== Edge Cases ====================
def test_classify_with_empty_string_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Empty strings should be treated as missing."""
fields = {
"payment_line": "",
"Amount": "",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
def test_classify_with_whitespace_only_fields_returns_letter(
self, classifier: DocumentClassifier
) -> None:
"""Whitespace-only strings should be treated as missing."""
fields = {
"payment_line": " ",
"Amount": "\t\n",
}
result = classifier.classify(fields)
assert result.document_type == "letter"
# ==================== ClassificationResult Immutability ====================
def test_classification_result_is_immutable(
self, classifier: DocumentClassifier
) -> None:
"""ClassificationResult should be a frozen dataclass."""
fields = {"payment_line": "test"}
result = classifier.classify(fields)
with pytest.raises((AttributeError, TypeError)):
result.document_type = "modified" # type: ignore
def test_classification_result_has_required_fields(
self, classifier: DocumentClassifier
) -> None:
"""ClassificationResult must have document_type, confidence, reason."""
fields = {"Amount": "100.00"}
result = classifier.classify(fields)
assert hasattr(result, "document_type")
assert hasattr(result, "confidence")
assert hasattr(result, "reason")
assert isinstance(result.document_type, str)
assert isinstance(result.confidence, float)
assert isinstance(result.reason, str)