Files
invoice-master-poc-v2/packages/inference/inference/web/workers/batch_queue.py
2026-01-27 23:58:17 +01:00

226 lines
6.3 KiB
Python

"""
Batch Upload Processing Queue
Background queue for async batch upload processing.
"""
import logging
import threading
from dataclasses import dataclass
from datetime import datetime
from queue import Queue, Full, Empty
from typing import Any
from uuid import UUID
logger = logging.getLogger(__name__)
@dataclass
class BatchTask:
"""Task for batch upload processing."""
batch_id: UUID
admin_token: str
zip_content: bytes
zip_filename: str
upload_source: str
auto_label: bool
created_at: datetime
class BatchTaskQueue:
"""Thread-safe queue for async batch upload processing."""
def __init__(self, max_size: int = 20, worker_count: int = 2):
"""Initialize the batch task queue.
Args:
max_size: Maximum queue size
worker_count: Number of worker threads
"""
self._queue: Queue[BatchTask] = Queue(maxsize=max_size)
self._workers: list[threading.Thread] = []
self._stop_event = threading.Event()
self._worker_count = worker_count
self._batch_service: Any | None = None
self._running = False
self._lock = threading.Lock()
def start(self, batch_service: Any) -> None:
"""Start worker threads with batch service.
Args:
batch_service: BatchUploadService instance for processing
"""
with self._lock:
if self._running:
logger.warning("Batch queue already running")
return
self._batch_service = batch_service
self._stop_event.clear()
self._running = True
# Start worker threads
for i in range(self._worker_count):
worker = threading.Thread(
target=self._worker_loop,
name=f"BatchWorker-{i}",
daemon=True,
)
worker.start()
self._workers.append(worker)
logger.info(f"Started {self._worker_count} batch workers")
def stop(self, timeout: float = 30.0) -> None:
"""Stop all worker threads gracefully.
Args:
timeout: Maximum time to wait for workers to finish
"""
with self._lock:
if not self._running:
return
logger.info("Stopping batch queue...")
self._stop_event.set()
self._running = False
# Wait for workers to finish
for worker in self._workers:
worker.join(timeout=timeout)
self._workers.clear()
logger.info("Batch queue stopped")
def submit(self, task: BatchTask) -> bool:
"""Submit a batch task to the queue.
Args:
task: Batch task to process
Returns:
True if task was queued, False if queue is full
"""
try:
self._queue.put(task, block=False)
logger.info(f"Queued batch task: batch_id={task.batch_id}")
return True
except Full:
logger.warning(f"Queue full, rejected task: batch_id={task.batch_id}")
return False
def get_queue_depth(self) -> int:
"""Get the number of pending tasks in queue.
Returns:
Number of tasks waiting to be processed
"""
return self._queue.qsize()
@property
def is_running(self) -> bool:
"""Check if queue is running.
Returns:
True if queue is active
"""
return self._running
def _worker_loop(self) -> None:
"""Worker thread main loop."""
worker_name = threading.current_thread().name
logger.info(f"{worker_name} started")
while not self._stop_event.is_set():
try:
# Get task with timeout to check stop event periodically
task = self._queue.get(timeout=1.0)
self._process_task(task)
self._queue.task_done()
except Empty:
# No tasks, continue loop to check stop event
continue
except Exception as e:
logger.error(f"{worker_name} error processing task: {e}", exc_info=True)
logger.info(f"{worker_name} stopped")
def _process_task(self, task: BatchTask) -> None:
"""Process a single batch task.
Args:
task: Batch task to process
"""
if self._batch_service is None:
logger.error("Batch service not initialized, cannot process task")
return
logger.info(
f"Processing batch task: batch_id={task.batch_id}, "
f"filename={task.zip_filename}"
)
try:
# Process the batch upload using the service
result = self._batch_service.process_zip_upload(
admin_token=task.admin_token,
zip_filename=task.zip_filename,
zip_content=task.zip_content,
upload_source=task.upload_source,
)
logger.info(
f"Batch task completed: batch_id={task.batch_id}, "
f"status={result.get('status')}, "
f"successful_files={result.get('successful_files')}, "
f"failed_files={result.get('failed_files')}"
)
except Exception as e:
logger.error(
f"Error processing batch task {task.batch_id}: {e}",
exc_info=True,
)
# Global batch queue instance
_batch_queue: BatchTaskQueue | None = None
_queue_lock = threading.Lock()
def get_batch_queue() -> BatchTaskQueue:
"""Get or create the global batch queue instance.
Returns:
Batch task queue instance
"""
global _batch_queue
if _batch_queue is None:
with _queue_lock:
if _batch_queue is None:
_batch_queue = BatchTaskQueue(max_size=20, worker_count=2)
return _batch_queue
def init_batch_queue(batch_service: Any) -> None:
"""Initialize and start the batch queue.
Args:
batch_service: BatchUploadService instance
"""
queue = get_batch_queue()
if not queue.is_running:
queue.start(batch_service)
def shutdown_batch_queue() -> None:
"""Shutdown the batch queue gracefully."""
global _batch_queue
if _batch_queue is not None:
_batch_queue.stop()