226 lines
6.3 KiB
Python
226 lines
6.3 KiB
Python
"""
|
|
Batch Upload Processing Queue
|
|
|
|
Background queue for async batch upload processing.
|
|
"""
|
|
|
|
import logging
|
|
import threading
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from queue import Queue, Full, Empty
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class BatchTask:
|
|
"""Task for batch upload processing."""
|
|
|
|
batch_id: UUID
|
|
admin_token: str
|
|
zip_content: bytes
|
|
zip_filename: str
|
|
upload_source: str
|
|
auto_label: bool
|
|
created_at: datetime
|
|
|
|
|
|
class BatchTaskQueue:
|
|
"""Thread-safe queue for async batch upload processing."""
|
|
|
|
def __init__(self, max_size: int = 20, worker_count: int = 2):
|
|
"""Initialize the batch task queue.
|
|
|
|
Args:
|
|
max_size: Maximum queue size
|
|
worker_count: Number of worker threads
|
|
"""
|
|
self._queue: Queue[BatchTask] = Queue(maxsize=max_size)
|
|
self._workers: list[threading.Thread] = []
|
|
self._stop_event = threading.Event()
|
|
self._worker_count = worker_count
|
|
self._batch_service: Any | None = None
|
|
self._running = False
|
|
self._lock = threading.Lock()
|
|
|
|
def start(self, batch_service: Any) -> None:
|
|
"""Start worker threads with batch service.
|
|
|
|
Args:
|
|
batch_service: BatchUploadService instance for processing
|
|
"""
|
|
with self._lock:
|
|
if self._running:
|
|
logger.warning("Batch queue already running")
|
|
return
|
|
|
|
self._batch_service = batch_service
|
|
self._stop_event.clear()
|
|
self._running = True
|
|
|
|
# Start worker threads
|
|
for i in range(self._worker_count):
|
|
worker = threading.Thread(
|
|
target=self._worker_loop,
|
|
name=f"BatchWorker-{i}",
|
|
daemon=True,
|
|
)
|
|
worker.start()
|
|
self._workers.append(worker)
|
|
|
|
logger.info(f"Started {self._worker_count} batch workers")
|
|
|
|
def stop(self, timeout: float = 30.0) -> None:
|
|
"""Stop all worker threads gracefully.
|
|
|
|
Args:
|
|
timeout: Maximum time to wait for workers to finish
|
|
"""
|
|
with self._lock:
|
|
if not self._running:
|
|
return
|
|
|
|
logger.info("Stopping batch queue...")
|
|
self._stop_event.set()
|
|
self._running = False
|
|
|
|
# Wait for workers to finish
|
|
for worker in self._workers:
|
|
worker.join(timeout=timeout)
|
|
|
|
self._workers.clear()
|
|
logger.info("Batch queue stopped")
|
|
|
|
def submit(self, task: BatchTask) -> bool:
|
|
"""Submit a batch task to the queue.
|
|
|
|
Args:
|
|
task: Batch task to process
|
|
|
|
Returns:
|
|
True if task was queued, False if queue is full
|
|
"""
|
|
try:
|
|
self._queue.put(task, block=False)
|
|
logger.info(f"Queued batch task: batch_id={task.batch_id}")
|
|
return True
|
|
except Full:
|
|
logger.warning(f"Queue full, rejected task: batch_id={task.batch_id}")
|
|
return False
|
|
|
|
def get_queue_depth(self) -> int:
|
|
"""Get the number of pending tasks in queue.
|
|
|
|
Returns:
|
|
Number of tasks waiting to be processed
|
|
"""
|
|
return self._queue.qsize()
|
|
|
|
@property
|
|
def is_running(self) -> bool:
|
|
"""Check if queue is running.
|
|
|
|
Returns:
|
|
True if queue is active
|
|
"""
|
|
return self._running
|
|
|
|
def _worker_loop(self) -> None:
|
|
"""Worker thread main loop."""
|
|
worker_name = threading.current_thread().name
|
|
logger.info(f"{worker_name} started")
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
# Get task with timeout to check stop event periodically
|
|
task = self._queue.get(timeout=1.0)
|
|
self._process_task(task)
|
|
self._queue.task_done()
|
|
except Empty:
|
|
# No tasks, continue loop to check stop event
|
|
continue
|
|
except Exception as e:
|
|
logger.error(f"{worker_name} error processing task: {e}", exc_info=True)
|
|
|
|
logger.info(f"{worker_name} stopped")
|
|
|
|
def _process_task(self, task: BatchTask) -> None:
|
|
"""Process a single batch task.
|
|
|
|
Args:
|
|
task: Batch task to process
|
|
"""
|
|
if self._batch_service is None:
|
|
logger.error("Batch service not initialized, cannot process task")
|
|
return
|
|
|
|
logger.info(
|
|
f"Processing batch task: batch_id={task.batch_id}, "
|
|
f"filename={task.zip_filename}"
|
|
)
|
|
|
|
try:
|
|
# Process the batch upload using the service
|
|
result = self._batch_service.process_zip_upload(
|
|
admin_token=task.admin_token,
|
|
zip_filename=task.zip_filename,
|
|
zip_content=task.zip_content,
|
|
upload_source=task.upload_source,
|
|
)
|
|
|
|
logger.info(
|
|
f"Batch task completed: batch_id={task.batch_id}, "
|
|
f"status={result.get('status')}, "
|
|
f"successful_files={result.get('successful_files')}, "
|
|
f"failed_files={result.get('failed_files')}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error processing batch task {task.batch_id}: {e}",
|
|
exc_info=True,
|
|
)
|
|
|
|
|
|
# Global batch queue instance
|
|
_batch_queue: BatchTaskQueue | None = None
|
|
_queue_lock = threading.Lock()
|
|
|
|
|
|
def get_batch_queue() -> BatchTaskQueue:
|
|
"""Get or create the global batch queue instance.
|
|
|
|
Returns:
|
|
Batch task queue instance
|
|
"""
|
|
global _batch_queue
|
|
|
|
if _batch_queue is None:
|
|
with _queue_lock:
|
|
if _batch_queue is None:
|
|
_batch_queue = BatchTaskQueue(max_size=20, worker_count=2)
|
|
|
|
return _batch_queue
|
|
|
|
|
|
def init_batch_queue(batch_service: Any) -> None:
|
|
"""Initialize and start the batch queue.
|
|
|
|
Args:
|
|
batch_service: BatchUploadService instance
|
|
"""
|
|
queue = get_batch_queue()
|
|
if not queue.is_running:
|
|
queue.start(batch_service)
|
|
|
|
|
|
def shutdown_batch_queue() -> None:
|
|
"""Shutdown the batch queue gracefully."""
|
|
global _batch_queue
|
|
|
|
if _batch_queue is not None:
|
|
_batch_queue.stop()
|