""" Configuration file for system-dependent paths and settings. This file contains paths that may vary between different systems. Copy this file and modify the paths according to your local installation. """ import os from pathlib import Path # ============================================================================ # System Paths - Modify these according to your installation # ============================================================================ # Poppler path (required for PDF to image conversion) # Download from: https://github.com/oschwartz10612/poppler-windows/releases # Example: r"C:\poppler-23.11.0\bin" POPPLER_PATH = os.getenv("POPPLER_PATH", r"C:\Program Files\poppler-25.07.0\Library\bin") # Tesseract path (optional - only needed if not in system PATH) # Download from: https://github.com/UB-Mannheim/tesseract/wiki # Example: r"C:\Program Files\Tesseract-OCR\tesseract.exe" TESSERACT_CMD = os.getenv("TESSERACT_CMD", None) # ============================================================================ # Project Paths - Generally don't need to modify these # ============================================================================ # Project root directory PROJECT_ROOT = Path(__file__).parent.absolute() # Data directories DATA_DIR = PROJECT_ROOT / "data" RAW_INVOICES_DIR = DATA_DIR / "raw_invoices" PROCESSED_IMAGES_DIR = DATA_DIR / "processed_images" OCR_RESULTS_DIR = DATA_DIR / "ocr_results" # YOLO dataset directories YOLO_DATASET_DIR = DATA_DIR / "yolo_dataset" YOLO_TEMP_IMAGES_DIR = YOLO_DATASET_DIR / "temp_all_images" YOLO_TEMP_LABELS_DIR = YOLO_DATASET_DIR / "temp_all_labels" YOLO_TRAIN_IMAGES_DIR = YOLO_DATASET_DIR / "images" / "train" YOLO_TRAIN_LABELS_DIR = YOLO_DATASET_DIR / "labels" / "train" YOLO_VAL_IMAGES_DIR = YOLO_DATASET_DIR / "images" / "val" YOLO_VAL_LABELS_DIR = YOLO_DATASET_DIR / "labels" / "val" # Model directories MODELS_DIR = PROJECT_ROOT / "models" DEFAULT_MODEL_PATH = MODELS_DIR / "payment_slip_detector_v1" / "weights" / "best.pt" # ============================================================================ # OCR Settings # ============================================================================ # Tesseract language (Swedish + English) TESSERACT_LANG = "swe" # Ensure Swedish language pack is installed # OCR confidence threshold (0-100) OCR_CONFIDENCE_THRESHOLD = 0 # ============================================================================ # Training Settings # ============================================================================ # YOLO model size: n (nano), s (small), m (medium), l (large), x (xlarge) YOLO_MODEL_SIZE = "n" # Training epochs TRAINING_EPOCHS = 100 # Batch size BATCH_SIZE = 16 # Image size for training IMAGE_SIZE = 640 # Validation split ratio (0.0 to 1.0) VALIDATION_SPLIT = 0.2 # Random seed for reproducibility RANDOM_SEED = 42 # ============================================================================ # API Settings (for main.py FastAPI server) # ============================================================================ # API host API_HOST = "127.0.0.1" # API port API_PORT = 8000 # ============================================================================ # Helper Functions # ============================================================================ def apply_tesseract_path(): """Apply Tesseract path if configured.""" if TESSERACT_CMD: import pytesseract pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD def validate_paths(): """Validate that required system paths exist.""" issues = [] # Check Poppler if not os.path.exists(POPPLER_PATH): issues.append(f"Poppler not found at: {POPPLER_PATH}") issues.append(" Download from: https://github.com/oschwartz10612/poppler-windows/releases") # Check Tesseract (if specified) if TESSERACT_CMD and not os.path.exists(TESSERACT_CMD): issues.append(f"Tesseract not found at: {TESSERACT_CMD}") issues.append(" Download from: https://github.com/UB-Mannheim/tesseract/wiki") if issues: print("Configuration Issues Found:") for issue in issues: print(f" {issue}") return False return True # ============================================================================ # Example: Environment Variable Override # ============================================================================ # You can set these in your environment instead of modifying this file: # # Windows: # set POPPLER_PATH=C:\poppler\bin # set TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe # # Linux/Mac: # export POPPLER_PATH=/usr/bin # export TESSERACT_CMD=/usr/bin/tesseract # ============================================================================