Files
invoice-master/requirements.txt
Yaojia Wang dafa86c588 Init
2025-10-26 20:41:11 +01:00

46 lines
1.6 KiB
Plaintext

# Core dependencies
ultralytics>=8.0.0 # YOLOv8
pytesseract>=0.3.10 # Tesseract OCR Python wrapper
# Image processing
pdf2image>=1.16.0 # PDF to image conversion
Pillow>=10.0.0 # Image manipulation
opencv-python>=4.8.0 # Computer vision
# Data handling
numpy>=1.24.0
pandas>=2.0.0
scikit-learn>=1.3.0 # For DBSCAN clustering in 02_create_labels.py
# API dependencies (for main.py)
fastapi>=0.104.0 # FastAPI web framework
uvicorn>=0.24.0 # ASGI server
python-multipart>=0.0.6 # For file upload support
# System utilities
# IMPORTANT: Requires system-level installation of:
#
# 1. Tesseract OCR:
# - Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki
# After installation, add to PATH or set: pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# - Linux: sudo apt-get install tesseract-ocr tesseract-ocr-swe tesseract-ocr-eng
# - macOS: brew install tesseract tesseract-lang
#
# 2. Poppler (for pdf2image):
# - Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases
# - Linux: sudo apt-get install poppler-utils
# - macOS: brew install poppler
#
# 3. Swedish language data for Tesseract:
# After installing Tesseract, you may need to download Swedish language files (swe.traineddata)
# from https://github.com/tesseract-ocr/tessdata
# Optional: GPU support
# torch>=2.0.0 # PyTorch with CUDA support
# torchvision>=0.15.0
# Development tools (optional)
# jupyter>=1.0.0
# matplotlib>=3.7.0
# seaborn>=0.12.0