WIP
This commit is contained in:
1
tests/domain/__init__.py
Normal file
1
tests/domain/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Domain layer tests
|
||||
176
tests/domain/test_document_classifier.py
Normal file
176
tests/domain/test_document_classifier.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
Tests for DocumentClassifier - TDD RED phase.
|
||||
|
||||
Test document type classification based on extracted fields.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from backend.domain.document_classifier import DocumentClassifier, ClassificationResult
|
||||
|
||||
|
||||
class TestDocumentClassifier:
|
||||
"""Test document classification logic."""
|
||||
|
||||
@pytest.fixture
|
||||
def classifier(self) -> DocumentClassifier:
|
||||
"""Create classifier instance."""
|
||||
return DocumentClassifier()
|
||||
|
||||
# ==================== Invoice Detection Tests ====================
|
||||
|
||||
def test_classify_with_payment_line_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Payment line is the strongest invoice indicator."""
|
||||
fields = {"payment_line": "# 123456 # 100 00 5 > 308-2963#"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.9
|
||||
assert "payment_line" in result.reason
|
||||
|
||||
def test_classify_with_multiple_indicators_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Multiple invoice indicators -> invoice with medium confidence."""
|
||||
fields = {
|
||||
"Amount": "1200.00",
|
||||
"Bankgiro": "123-4567",
|
||||
"payment_line": None,
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.7
|
||||
|
||||
def test_classify_with_ocr_and_amount_returns_invoice(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""OCR + Amount is typical invoice pattern."""
|
||||
fields = {
|
||||
"OCR": "123456789012",
|
||||
"Amount": "500.00",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert result.confidence >= 0.7
|
||||
|
||||
def test_classify_with_single_indicator_returns_invoice_lower_confidence(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Single indicator -> invoice but lower confidence."""
|
||||
fields = {"Amount": "100.00"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
assert 0.5 <= result.confidence < 0.8
|
||||
|
||||
def test_classify_with_invoice_number_only(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Invoice number alone suggests invoice."""
|
||||
fields = {"InvoiceNumber": "INV-2024-001"}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "invoice"
|
||||
|
||||
# ==================== Letter Detection Tests ====================
|
||||
|
||||
def test_classify_with_no_indicators_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""No invoice indicators -> letter."""
|
||||
fields: dict[str, str | None] = {}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
assert result.confidence >= 0.5
|
||||
|
||||
def test_classify_with_empty_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""All fields empty or None -> letter."""
|
||||
fields = {
|
||||
"payment_line": None,
|
||||
"OCR": None,
|
||||
"Amount": None,
|
||||
"Bankgiro": None,
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
def test_classify_with_only_non_indicator_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Fields that don't indicate invoice -> letter."""
|
||||
fields = {
|
||||
"CustomerNumber": "C12345",
|
||||
"SupplierOrgNumber": "556677-8899",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
# ==================== Edge Cases ====================
|
||||
|
||||
def test_classify_with_empty_string_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Empty strings should be treated as missing."""
|
||||
fields = {
|
||||
"payment_line": "",
|
||||
"Amount": "",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
def test_classify_with_whitespace_only_fields_returns_letter(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""Whitespace-only strings should be treated as missing."""
|
||||
fields = {
|
||||
"payment_line": " ",
|
||||
"Amount": "\t\n",
|
||||
}
|
||||
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert result.document_type == "letter"
|
||||
|
||||
# ==================== ClassificationResult Immutability ====================
|
||||
|
||||
def test_classification_result_is_immutable(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""ClassificationResult should be a frozen dataclass."""
|
||||
fields = {"payment_line": "test"}
|
||||
result = classifier.classify(fields)
|
||||
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
result.document_type = "modified" # type: ignore
|
||||
|
||||
def test_classification_result_has_required_fields(
|
||||
self, classifier: DocumentClassifier
|
||||
) -> None:
|
||||
"""ClassificationResult must have document_type, confidence, reason."""
|
||||
fields = {"Amount": "100.00"}
|
||||
result = classifier.classify(fields)
|
||||
|
||||
assert hasattr(result, "document_type")
|
||||
assert hasattr(result, "confidence")
|
||||
assert hasattr(result, "reason")
|
||||
assert isinstance(result.document_type, str)
|
||||
assert isinstance(result.confidence, float)
|
||||
assert isinstance(result.reason, str)
|
||||
232
tests/domain/test_invoice_validator.py
Normal file
232
tests/domain/test_invoice_validator.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Tests for InvoiceValidator - TDD RED phase.
|
||||
|
||||
Test invoice field validation logic.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from backend.domain.invoice_validator import (
|
||||
InvoiceValidator,
|
||||
ValidationResult,
|
||||
ValidationIssue,
|
||||
)
|
||||
|
||||
|
||||
class TestInvoiceValidator:
|
||||
"""Test invoice validation logic."""
|
||||
|
||||
@pytest.fixture
|
||||
def validator(self) -> InvoiceValidator:
|
||||
"""Create validator instance with default settings."""
|
||||
return InvoiceValidator()
|
||||
|
||||
@pytest.fixture
|
||||
def validator_strict(self) -> InvoiceValidator:
|
||||
"""Create validator with strict confidence threshold."""
|
||||
return InvoiceValidator(min_confidence=0.8)
|
||||
|
||||
# ==================== Valid Invoice Tests ====================
|
||||
|
||||
def test_validate_complete_invoice_is_valid(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Complete invoice with all required fields is valid."""
|
||||
fields = {
|
||||
"Amount": "1200.00",
|
||||
"OCR": "123456789012",
|
||||
"Bankgiro": "123-4567",
|
||||
}
|
||||
confidence = {
|
||||
"Amount": 0.95,
|
||||
"OCR": 0.90,
|
||||
"Bankgiro": 0.85,
|
||||
}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len([i for i in result.issues if i.severity == "error"]) == 0
|
||||
|
||||
def test_validate_invoice_with_payment_line_is_valid(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Invoice with payment_line as payment reference is valid."""
|
||||
fields = {
|
||||
"Amount": "500.00",
|
||||
"payment_line": "# 123 # 500 00 5 > 308#",
|
||||
}
|
||||
confidence = {"Amount": 0.9, "payment_line": 0.85}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is True
|
||||
|
||||
# ==================== Invalid Invoice Tests ====================
|
||||
|
||||
def test_validate_missing_amount_is_invalid(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Missing Amount field should produce error."""
|
||||
fields = {
|
||||
"OCR": "123456789012",
|
||||
"Bankgiro": "123-4567",
|
||||
}
|
||||
confidence = {"OCR": 0.9, "Bankgiro": 0.85}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is False
|
||||
error_fields = [i.field for i in result.issues if i.severity == "error"]
|
||||
assert "Amount" in error_fields
|
||||
|
||||
def test_validate_missing_payment_reference_produces_warning(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Missing all payment references should produce warning."""
|
||||
fields = {"Amount": "1200.00"}
|
||||
confidence = {"Amount": 0.9}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
# Missing payment ref is warning, not error
|
||||
warning_fields = [i.field for i in result.issues if i.severity == "warning"]
|
||||
assert "payment_reference" in warning_fields
|
||||
|
||||
# ==================== Confidence Threshold Tests ====================
|
||||
|
||||
def test_validate_low_confidence_produces_warning(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Fields below confidence threshold should produce warning."""
|
||||
fields = {
|
||||
"Amount": "1200.00",
|
||||
"OCR": "123456789012",
|
||||
}
|
||||
confidence = {
|
||||
"Amount": 0.9,
|
||||
"OCR": 0.3, # Below default threshold of 0.5
|
||||
}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
low_conf_warnings = [
|
||||
i for i in result.issues
|
||||
if i.severity == "warning" and "confidence" in i.message.lower()
|
||||
]
|
||||
assert len(low_conf_warnings) > 0
|
||||
|
||||
def test_validate_strict_threshold_more_warnings(
|
||||
self, validator_strict: InvoiceValidator
|
||||
) -> None:
|
||||
"""Strict validator should produce more warnings."""
|
||||
fields = {
|
||||
"Amount": "1200.00",
|
||||
"OCR": "123456789012",
|
||||
}
|
||||
confidence = {
|
||||
"Amount": 0.7, # Below 0.8 threshold
|
||||
"OCR": 0.6, # Below 0.8 threshold
|
||||
}
|
||||
|
||||
result = validator_strict.validate(fields, confidence)
|
||||
|
||||
low_conf_warnings = [
|
||||
i for i in result.issues
|
||||
if i.severity == "warning" and "confidence" in i.message.lower()
|
||||
]
|
||||
assert len(low_conf_warnings) >= 2
|
||||
|
||||
# ==================== Edge Cases ====================
|
||||
|
||||
def test_validate_empty_fields_is_invalid(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Empty fields dict should be invalid."""
|
||||
fields: dict[str, str | None] = {}
|
||||
confidence: dict[str, float] = {}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is False
|
||||
|
||||
def test_validate_none_field_values_treated_as_missing(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""None values should be treated as missing."""
|
||||
fields = {
|
||||
"Amount": None,
|
||||
"OCR": "123456789012",
|
||||
}
|
||||
confidence = {"OCR": 0.9}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is False
|
||||
error_fields = [i.field for i in result.issues if i.severity == "error"]
|
||||
assert "Amount" in error_fields
|
||||
|
||||
def test_validate_empty_string_treated_as_missing(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Empty string should be treated as missing."""
|
||||
fields = {
|
||||
"Amount": "",
|
||||
"OCR": "123456789012",
|
||||
}
|
||||
confidence = {"OCR": 0.9}
|
||||
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert result.is_valid is False
|
||||
|
||||
# ==================== ValidationResult Properties ====================
|
||||
|
||||
def test_validation_result_is_immutable(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""ValidationResult should be a frozen dataclass."""
|
||||
fields = {"Amount": "100.00", "OCR": "123"}
|
||||
confidence = {"Amount": 0.9, "OCR": 0.9}
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
result.is_valid = False # type: ignore
|
||||
|
||||
def test_validation_result_issues_is_tuple(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""Issues should be a tuple (immutable)."""
|
||||
fields = {"Amount": "100.00"}
|
||||
confidence = {"Amount": 0.9}
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert isinstance(result.issues, tuple)
|
||||
|
||||
def test_validation_result_has_confidence(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""ValidationResult should have confidence score."""
|
||||
fields = {"Amount": "100.00", "OCR": "123"}
|
||||
confidence = {"Amount": 0.9, "OCR": 0.8}
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert hasattr(result, "confidence")
|
||||
assert 0.0 <= result.confidence <= 1.0
|
||||
|
||||
# ==================== ValidationIssue Tests ====================
|
||||
|
||||
def test_validation_issue_has_required_fields(
|
||||
self, validator: InvoiceValidator
|
||||
) -> None:
|
||||
"""ValidationIssue must have field, severity, message."""
|
||||
fields: dict[str, str | None] = {}
|
||||
confidence: dict[str, float] = {}
|
||||
result = validator.validate(fields, confidence)
|
||||
|
||||
assert len(result.issues) > 0
|
||||
issue = result.issues[0]
|
||||
|
||||
assert hasattr(issue, "field")
|
||||
assert hasattr(issue, "severity")
|
||||
assert hasattr(issue, "message")
|
||||
assert issue.severity in ("error", "warning", "info")
|
||||
Reference in New Issue
Block a user