# Core dependencies ultralytics>=8.0.0 # YOLOv8 pytesseract>=0.3.10 # Tesseract OCR Python wrapper # Image processing pdf2image>=1.16.0 # PDF to image conversion Pillow>=10.0.0 # Image manipulation opencv-python>=4.8.0 # Computer vision # Data handling numpy>=1.24.0 pandas>=2.0.0 scikit-learn>=1.3.0 # For DBSCAN clustering in 02_create_labels.py # API dependencies (for main.py) fastapi>=0.104.0 # FastAPI web framework uvicorn>=0.24.0 # ASGI server python-multipart>=0.0.6 # For file upload support # System utilities # IMPORTANT: Requires system-level installation of: # # 1. Tesseract OCR: # - Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki # After installation, add to PATH or set: pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # - Linux: sudo apt-get install tesseract-ocr tesseract-ocr-swe tesseract-ocr-eng # - macOS: brew install tesseract tesseract-lang # # 2. Poppler (for pdf2image): # - Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases # - Linux: sudo apt-get install poppler-utils # - macOS: brew install poppler # # 3. Swedish language data for Tesseract: # After installing Tesseract, you may need to download Swedish language files (swe.traineddata) # from https://github.com/tesseract-ocr/tessdata # Optional: GPU support # torch>=2.0.0 # PyTorch with CUDA support # torchvision>=0.15.0 # Development tools (optional) # jupyter>=1.0.0 # matplotlib>=3.7.0 # seaborn>=0.12.0