Files
invoice-master-poc-v2/tests/integration/services/test_document_service_integration.py
Yaojia Wang b602d0a340 re-structure
2026-02-01 22:55:31 +01:00

284 lines
9.6 KiB
Python

"""
Document Service Integration Tests
Tests DocumentService with real storage operations.
"""
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from backend.web.services.document_service import DocumentService, DocumentResult
class MockStorageBackend:
"""Simple in-memory storage backend for testing."""
def __init__(self):
self._files: dict[str, bytes] = {}
def upload_bytes(self, content: bytes, remote_path: str, overwrite: bool = False) -> None:
if not overwrite and remote_path in self._files:
raise FileExistsError(f"File already exists: {remote_path}")
self._files[remote_path] = content
def download_bytes(self, remote_path: str) -> bytes:
if remote_path not in self._files:
raise FileNotFoundError(f"File not found: {remote_path}")
return self._files[remote_path]
def get_presigned_url(self, remote_path: str, expires_in_seconds: int = 3600) -> str:
return f"https://storage.example.com/{remote_path}?expires={expires_in_seconds}"
def exists(self, remote_path: str) -> bool:
return remote_path in self._files
def delete(self, remote_path: str) -> bool:
if remote_path in self._files:
del self._files[remote_path]
return True
return False
def list_files(self, prefix: str) -> list[str]:
return [path for path in self._files.keys() if path.startswith(prefix)]
@pytest.fixture
def mock_storage():
"""Create a mock storage backend."""
return MockStorageBackend()
@pytest.fixture
def document_service(mock_storage):
"""Create a DocumentService with mock storage."""
return DocumentService(storage_backend=mock_storage)
class TestDocumentUpload:
"""Tests for document upload operations."""
def test_upload_document(self, document_service):
"""Test uploading a document."""
content = b"%PDF-1.4 test content"
filename = "test_invoice.pdf"
result = document_service.upload_document(content, filename)
assert result is not None
assert result.id is not None
assert result.filename == filename
assert result.file_path.startswith("documents/")
assert result.file_path.endswith(".pdf")
def test_upload_document_with_custom_id(self, document_service):
"""Test uploading with custom document ID."""
content = b"%PDF-1.4 test content"
filename = "invoice.pdf"
custom_id = "custom-doc-12345"
result = document_service.upload_document(
content, filename, document_id=custom_id
)
assert result.id == custom_id
assert custom_id in result.file_path
def test_upload_preserves_extension(self, document_service):
"""Test that file extension is preserved."""
cases = [
("document.pdf", ".pdf"),
("image.PNG", ".png"),
("file.JPEG", ".jpeg"),
("noextension", ""),
]
for filename, expected_ext in cases:
result = document_service.upload_document(b"content", filename)
if expected_ext:
assert result.file_path.endswith(expected_ext)
def test_upload_document_overwrite(self, document_service, mock_storage):
"""Test that upload overwrites existing file."""
content1 = b"original content"
content2 = b"new content"
doc_id = "overwrite-test"
document_service.upload_document(content1, "doc.pdf", document_id=doc_id)
document_service.upload_document(content2, "doc.pdf", document_id=doc_id)
# Should have new content
remote_path = f"documents/{doc_id}.pdf"
stored_content = mock_storage.download_bytes(remote_path)
assert stored_content == content2
class TestDocumentDownload:
"""Tests for document download operations."""
def test_download_document(self, document_service, mock_storage):
"""Test downloading a document."""
content = b"test document content"
remote_path = "documents/test-doc.pdf"
mock_storage.upload_bytes(content, remote_path)
downloaded = document_service.download_document(remote_path)
assert downloaded == content
def test_download_nonexistent_document(self, document_service):
"""Test downloading document that doesn't exist."""
with pytest.raises(FileNotFoundError):
document_service.download_document("documents/nonexistent.pdf")
class TestDocumentUrl:
"""Tests for document URL generation."""
def test_get_document_url(self, document_service, mock_storage):
"""Test getting presigned URL for document."""
remote_path = "documents/test-doc.pdf"
mock_storage.upload_bytes(b"content", remote_path)
url = document_service.get_document_url(remote_path, expires_in_seconds=7200)
assert url.startswith("https://")
assert remote_path in url
assert "7200" in url
def test_get_document_url_default_expiry(self, document_service):
"""Test default URL expiry."""
url = document_service.get_document_url("documents/doc.pdf")
assert "3600" in url
class TestDocumentExists:
"""Tests for document existence check."""
def test_document_exists(self, document_service, mock_storage):
"""Test checking if document exists."""
remote_path = "documents/existing.pdf"
mock_storage.upload_bytes(b"content", remote_path)
assert document_service.document_exists(remote_path) is True
def test_document_not_exists(self, document_service):
"""Test checking if nonexistent document exists."""
assert document_service.document_exists("documents/nonexistent.pdf") is False
class TestDocumentDelete:
"""Tests for document deletion."""
def test_delete_document(self, document_service, mock_storage):
"""Test deleting a document."""
remote_path = "documents/to-delete.pdf"
mock_storage.upload_bytes(b"content", remote_path)
result = document_service.delete_document_files(remote_path)
assert result is True
assert document_service.document_exists(remote_path) is False
def test_delete_nonexistent_document(self, document_service):
"""Test deleting document that doesn't exist."""
result = document_service.delete_document_files("documents/nonexistent.pdf")
assert result is False
class TestPageImages:
"""Tests for page image operations."""
def test_save_page_image(self, document_service, mock_storage):
"""Test saving a page image."""
doc_id = "test-doc-123"
page_num = 1
image_content = b"\x89PNG\r\n\x1a\n fake png"
remote_path = document_service.save_page_image(doc_id, page_num, image_content)
assert remote_path == f"images/{doc_id}/page_{page_num}.png"
assert mock_storage.exists(remote_path)
def test_save_multiple_page_images(self, document_service, mock_storage):
"""Test saving images for multiple pages."""
doc_id = "multi-page-doc"
for page_num in range(1, 4):
content = f"page {page_num} content".encode()
document_service.save_page_image(doc_id, page_num, content)
images = document_service.list_document_images(doc_id)
assert len(images) == 3
def test_get_page_image(self, document_service, mock_storage):
"""Test downloading a page image."""
doc_id = "test-doc"
page_num = 2
image_content = b"image data"
document_service.save_page_image(doc_id, page_num, image_content)
downloaded = document_service.get_page_image(doc_id, page_num)
assert downloaded == image_content
def test_get_page_image_url(self, document_service):
"""Test getting URL for page image."""
doc_id = "test-doc"
page_num = 1
url = document_service.get_page_image_url(doc_id, page_num)
assert f"images/{doc_id}/page_{page_num}.png" in url
def test_list_document_images(self, document_service, mock_storage):
"""Test listing all images for a document."""
doc_id = "list-test-doc"
for i in range(5):
document_service.save_page_image(doc_id, i + 1, f"page {i}".encode())
images = document_service.list_document_images(doc_id)
assert len(images) == 5
def test_delete_document_images(self, document_service, mock_storage):
"""Test deleting all images for a document."""
doc_id = "delete-images-doc"
for i in range(3):
document_service.save_page_image(doc_id, i + 1, b"content")
deleted_count = document_service.delete_document_images(doc_id)
assert deleted_count == 3
assert len(document_service.list_document_images(doc_id)) == 0
class TestRoundTrip:
"""Tests for complete upload-download cycles."""
def test_document_round_trip(self, document_service):
"""Test uploading and downloading document."""
original_content = b"%PDF-1.4 complete document content here"
filename = "roundtrip.pdf"
result = document_service.upload_document(original_content, filename)
downloaded = document_service.download_document(result.file_path)
assert downloaded == original_content
def test_image_round_trip(self, document_service):
"""Test saving and retrieving page image."""
doc_id = "roundtrip-doc"
page_num = 1
original_image = b"\x89PNG fake image data"
document_service.save_page_image(doc_id, page_num, original_image)
retrieved = document_service.get_page_image(doc_id, page_num)
assert retrieved == original_image