""" Tests for DocumentClassifier - TDD RED phase. Test document type classification based on extracted fields. """ import pytest from backend.domain.document_classifier import DocumentClassifier, ClassificationResult class TestDocumentClassifier: """Test document classification logic.""" @pytest.fixture def classifier(self) -> DocumentClassifier: """Create classifier instance.""" return DocumentClassifier() # ==================== Invoice Detection Tests ==================== def test_classify_with_payment_line_returns_invoice( self, classifier: DocumentClassifier ) -> None: """Payment line is the strongest invoice indicator.""" fields = {"payment_line": "# 123456 # 100 00 5 > 308-2963#"} result = classifier.classify(fields) assert result.document_type == "invoice" assert result.confidence >= 0.9 assert "payment_line" in result.reason def test_classify_with_multiple_indicators_returns_invoice( self, classifier: DocumentClassifier ) -> None: """Multiple invoice indicators -> invoice with medium confidence.""" fields = { "Amount": "1200.00", "Bankgiro": "123-4567", "payment_line": None, } result = classifier.classify(fields) assert result.document_type == "invoice" assert result.confidence >= 0.7 def test_classify_with_ocr_and_amount_returns_invoice( self, classifier: DocumentClassifier ) -> None: """OCR + Amount is typical invoice pattern.""" fields = { "OCR": "123456789012", "Amount": "500.00", } result = classifier.classify(fields) assert result.document_type == "invoice" assert result.confidence >= 0.7 def test_classify_with_single_indicator_returns_invoice_lower_confidence( self, classifier: DocumentClassifier ) -> None: """Single indicator -> invoice but lower confidence.""" fields = {"Amount": "100.00"} result = classifier.classify(fields) assert result.document_type == "invoice" assert 0.5 <= result.confidence < 0.8 def test_classify_with_invoice_number_only( self, classifier: DocumentClassifier ) -> None: """Invoice number alone suggests invoice.""" fields = {"InvoiceNumber": "INV-2024-001"} result = classifier.classify(fields) assert result.document_type == "invoice" # ==================== Letter Detection Tests ==================== def test_classify_with_no_indicators_returns_letter( self, classifier: DocumentClassifier ) -> None: """No invoice indicators -> letter.""" fields: dict[str, str | None] = {} result = classifier.classify(fields) assert result.document_type == "letter" assert result.confidence >= 0.5 def test_classify_with_empty_fields_returns_letter( self, classifier: DocumentClassifier ) -> None: """All fields empty or None -> letter.""" fields = { "payment_line": None, "OCR": None, "Amount": None, "Bankgiro": None, } result = classifier.classify(fields) assert result.document_type == "letter" def test_classify_with_only_non_indicator_fields_returns_letter( self, classifier: DocumentClassifier ) -> None: """Fields that don't indicate invoice -> letter.""" fields = { "CustomerNumber": "C12345", "SupplierOrgNumber": "556677-8899", } result = classifier.classify(fields) assert result.document_type == "letter" # ==================== Edge Cases ==================== def test_classify_with_empty_string_fields_returns_letter( self, classifier: DocumentClassifier ) -> None: """Empty strings should be treated as missing.""" fields = { "payment_line": "", "Amount": "", } result = classifier.classify(fields) assert result.document_type == "letter" def test_classify_with_whitespace_only_fields_returns_letter( self, classifier: DocumentClassifier ) -> None: """Whitespace-only strings should be treated as missing.""" fields = { "payment_line": " ", "Amount": "\t\n", } result = classifier.classify(fields) assert result.document_type == "letter" # ==================== ClassificationResult Immutability ==================== def test_classification_result_is_immutable( self, classifier: DocumentClassifier ) -> None: """ClassificationResult should be a frozen dataclass.""" fields = {"payment_line": "test"} result = classifier.classify(fields) with pytest.raises((AttributeError, TypeError)): result.document_type = "modified" # type: ignore def test_classification_result_has_required_fields( self, classifier: DocumentClassifier ) -> None: """ClassificationResult must have document_type, confidence, reason.""" fields = {"Amount": "100.00"} result = classifier.classify(fields) assert hasattr(result, "document_type") assert hasattr(result, "confidence") assert hasattr(result, "reason") assert isinstance(result.document_type, str) assert isinstance(result.confidence, float) assert isinstance(result.reason, str)