46 lines
1.6 KiB
Plaintext
46 lines
1.6 KiB
Plaintext
# Core dependencies
|
|
ultralytics>=8.0.0 # YOLOv8
|
|
pytesseract>=0.3.10 # Tesseract OCR Python wrapper
|
|
|
|
# Image processing
|
|
pdf2image>=1.16.0 # PDF to image conversion
|
|
Pillow>=10.0.0 # Image manipulation
|
|
opencv-python>=4.8.0 # Computer vision
|
|
|
|
# Data handling
|
|
numpy>=1.24.0
|
|
pandas>=2.0.0
|
|
scikit-learn>=1.3.0 # For DBSCAN clustering in 02_create_labels.py
|
|
|
|
# API dependencies (for main.py)
|
|
fastapi>=0.104.0 # FastAPI web framework
|
|
uvicorn>=0.24.0 # ASGI server
|
|
python-multipart>=0.0.6 # For file upload support
|
|
|
|
# System utilities
|
|
# IMPORTANT: Requires system-level installation of:
|
|
#
|
|
# 1. Tesseract OCR:
|
|
# - Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki
|
|
# After installation, add to PATH or set: pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
|
# - Linux: sudo apt-get install tesseract-ocr tesseract-ocr-swe tesseract-ocr-eng
|
|
# - macOS: brew install tesseract tesseract-lang
|
|
#
|
|
# 2. Poppler (for pdf2image):
|
|
# - Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases
|
|
# - Linux: sudo apt-get install poppler-utils
|
|
# - macOS: brew install poppler
|
|
#
|
|
# 3. Swedish language data for Tesseract:
|
|
# After installing Tesseract, you may need to download Swedish language files (swe.traineddata)
|
|
# from https://github.com/tesseract-ocr/tessdata
|
|
|
|
# Optional: GPU support
|
|
# torch>=2.0.0 # PyTorch with CUDA support
|
|
# torchvision>=0.15.0
|
|
|
|
# Development tools (optional)
|
|
# jupyter>=1.0.0
|
|
# matplotlib>=3.7.0
|
|
# seaborn>=0.12.0
|