""" Document Service Integration Tests Tests DocumentService with real storage operations. """ from pathlib import Path from unittest.mock import MagicMock import pytest from inference.web.services.document_service import DocumentService, DocumentResult class MockStorageBackend: """Simple in-memory storage backend for testing.""" def __init__(self): self._files: dict[str, bytes] = {} def upload_bytes(self, content: bytes, remote_path: str, overwrite: bool = False) -> None: if not overwrite and remote_path in self._files: raise FileExistsError(f"File already exists: {remote_path}") self._files[remote_path] = content def download_bytes(self, remote_path: str) -> bytes: if remote_path not in self._files: raise FileNotFoundError(f"File not found: {remote_path}") return self._files[remote_path] def get_presigned_url(self, remote_path: str, expires_in_seconds: int = 3600) -> str: return f"https://storage.example.com/{remote_path}?expires={expires_in_seconds}" def exists(self, remote_path: str) -> bool: return remote_path in self._files def delete(self, remote_path: str) -> bool: if remote_path in self._files: del self._files[remote_path] return True return False def list_files(self, prefix: str) -> list[str]: return [path for path in self._files.keys() if path.startswith(prefix)] @pytest.fixture def mock_storage(): """Create a mock storage backend.""" return MockStorageBackend() @pytest.fixture def document_service(mock_storage): """Create a DocumentService with mock storage.""" return DocumentService(storage_backend=mock_storage) class TestDocumentUpload: """Tests for document upload operations.""" def test_upload_document(self, document_service): """Test uploading a document.""" content = b"%PDF-1.4 test content" filename = "test_invoice.pdf" result = document_service.upload_document(content, filename) assert result is not None assert result.id is not None assert result.filename == filename assert result.file_path.startswith("documents/") assert result.file_path.endswith(".pdf") def test_upload_document_with_custom_id(self, document_service): """Test uploading with custom document ID.""" content = b"%PDF-1.4 test content" filename = "invoice.pdf" custom_id = "custom-doc-12345" result = document_service.upload_document( content, filename, document_id=custom_id ) assert result.id == custom_id assert custom_id in result.file_path def test_upload_preserves_extension(self, document_service): """Test that file extension is preserved.""" cases = [ ("document.pdf", ".pdf"), ("image.PNG", ".png"), ("file.JPEG", ".jpeg"), ("noextension", ""), ] for filename, expected_ext in cases: result = document_service.upload_document(b"content", filename) if expected_ext: assert result.file_path.endswith(expected_ext) def test_upload_document_overwrite(self, document_service, mock_storage): """Test that upload overwrites existing file.""" content1 = b"original content" content2 = b"new content" doc_id = "overwrite-test" document_service.upload_document(content1, "doc.pdf", document_id=doc_id) document_service.upload_document(content2, "doc.pdf", document_id=doc_id) # Should have new content remote_path = f"documents/{doc_id}.pdf" stored_content = mock_storage.download_bytes(remote_path) assert stored_content == content2 class TestDocumentDownload: """Tests for document download operations.""" def test_download_document(self, document_service, mock_storage): """Test downloading a document.""" content = b"test document content" remote_path = "documents/test-doc.pdf" mock_storage.upload_bytes(content, remote_path) downloaded = document_service.download_document(remote_path) assert downloaded == content def test_download_nonexistent_document(self, document_service): """Test downloading document that doesn't exist.""" with pytest.raises(FileNotFoundError): document_service.download_document("documents/nonexistent.pdf") class TestDocumentUrl: """Tests for document URL generation.""" def test_get_document_url(self, document_service, mock_storage): """Test getting presigned URL for document.""" remote_path = "documents/test-doc.pdf" mock_storage.upload_bytes(b"content", remote_path) url = document_service.get_document_url(remote_path, expires_in_seconds=7200) assert url.startswith("https://") assert remote_path in url assert "7200" in url def test_get_document_url_default_expiry(self, document_service): """Test default URL expiry.""" url = document_service.get_document_url("documents/doc.pdf") assert "3600" in url class TestDocumentExists: """Tests for document existence check.""" def test_document_exists(self, document_service, mock_storage): """Test checking if document exists.""" remote_path = "documents/existing.pdf" mock_storage.upload_bytes(b"content", remote_path) assert document_service.document_exists(remote_path) is True def test_document_not_exists(self, document_service): """Test checking if nonexistent document exists.""" assert document_service.document_exists("documents/nonexistent.pdf") is False class TestDocumentDelete: """Tests for document deletion.""" def test_delete_document(self, document_service, mock_storage): """Test deleting a document.""" remote_path = "documents/to-delete.pdf" mock_storage.upload_bytes(b"content", remote_path) result = document_service.delete_document_files(remote_path) assert result is True assert document_service.document_exists(remote_path) is False def test_delete_nonexistent_document(self, document_service): """Test deleting document that doesn't exist.""" result = document_service.delete_document_files("documents/nonexistent.pdf") assert result is False class TestPageImages: """Tests for page image operations.""" def test_save_page_image(self, document_service, mock_storage): """Test saving a page image.""" doc_id = "test-doc-123" page_num = 1 image_content = b"\x89PNG\r\n\x1a\n fake png" remote_path = document_service.save_page_image(doc_id, page_num, image_content) assert remote_path == f"images/{doc_id}/page_{page_num}.png" assert mock_storage.exists(remote_path) def test_save_multiple_page_images(self, document_service, mock_storage): """Test saving images for multiple pages.""" doc_id = "multi-page-doc" for page_num in range(1, 4): content = f"page {page_num} content".encode() document_service.save_page_image(doc_id, page_num, content) images = document_service.list_document_images(doc_id) assert len(images) == 3 def test_get_page_image(self, document_service, mock_storage): """Test downloading a page image.""" doc_id = "test-doc" page_num = 2 image_content = b"image data" document_service.save_page_image(doc_id, page_num, image_content) downloaded = document_service.get_page_image(doc_id, page_num) assert downloaded == image_content def test_get_page_image_url(self, document_service): """Test getting URL for page image.""" doc_id = "test-doc" page_num = 1 url = document_service.get_page_image_url(doc_id, page_num) assert f"images/{doc_id}/page_{page_num}.png" in url def test_list_document_images(self, document_service, mock_storage): """Test listing all images for a document.""" doc_id = "list-test-doc" for i in range(5): document_service.save_page_image(doc_id, i + 1, f"page {i}".encode()) images = document_service.list_document_images(doc_id) assert len(images) == 5 def test_delete_document_images(self, document_service, mock_storage): """Test deleting all images for a document.""" doc_id = "delete-images-doc" for i in range(3): document_service.save_page_image(doc_id, i + 1, b"content") deleted_count = document_service.delete_document_images(doc_id) assert deleted_count == 3 assert len(document_service.list_document_images(doc_id)) == 0 class TestRoundTrip: """Tests for complete upload-download cycles.""" def test_document_round_trip(self, document_service): """Test uploading and downloading document.""" original_content = b"%PDF-1.4 complete document content here" filename = "roundtrip.pdf" result = document_service.upload_document(original_content, filename) downloaded = document_service.download_document(result.file_path) assert downloaded == original_content def test_image_round_trip(self, document_service): """Test saving and retrieving page image.""" doc_id = "roundtrip-doc" page_num = 1 original_image = b"\x89PNG fake image data" document_service.save_page_image(doc_id, page_num, original_image) retrieved = document_service.get_page_image(doc_id, page_num) assert retrieved == original_image