"""
Configuration settings for the invoice extraction system.
"""

import os
import platform
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables from .env file
env_path = Path(__file__).parent / '.env'
load_dotenv(dotenv_path=env_path)


def _is_wsl() -> bool:
    """Check if running inside WSL (Windows Subsystem for Linux)."""
    if platform.system() != 'Linux':
        return False
    # Check for WSL-specific indicators
    if os.environ.get('WSL_DISTRO_NAME'):
        return True
    try:
        with open('/proc/version', 'r') as f:
            return 'microsoft' in f.read().lower()
    except (FileNotFoundError, PermissionError):
        return False


# PostgreSQL Database Configuration
# Now loaded from environment variables for security
DATABASE = {
    'host': os.getenv('DB_HOST', '192.168.68.31'),
    'port': int(os.getenv('DB_PORT', '5432')),
    'database': os.getenv('DB_NAME', 'docmaster'),
    'user': os.getenv('DB_USER', 'docmaster'),
    'password': os.getenv('DB_PASSWORD'),  # No default for security
}

# Validate required configuration
if not DATABASE['password']:
    raise ValueError(
        "DB_PASSWORD environment variable is not set. "
        "Please create a .env file based on .env.example and set DB_PASSWORD."
    )

# Connection string for psycopg2
def get_db_connection_string():
    return f"postgresql://{DATABASE['user']}:{DATABASE['password']}@{DATABASE['host']}:{DATABASE['port']}/{DATABASE['database']}"


# Paths Configuration - auto-detect WSL vs Windows
if _is_wsl():
    # WSL: use native Linux filesystem for better I/O performance
    PATHS = {
        'csv_dir': os.path.expanduser('~/invoice-data/structured_data'),
        'pdf_dir': os.path.expanduser('~/invoice-data/raw_pdfs'),
        'output_dir': os.path.expanduser('~/invoice-data/dataset'),
        'reports_dir': 'reports',  # Keep reports in project directory
    }
else:
    # Windows or native Linux: use relative paths
    PATHS = {
        'csv_dir': 'data/structured_data',
        'pdf_dir': 'data/raw_pdfs',
        'output_dir': 'data/dataset',
        'reports_dir': 'reports',
    }

# Auto-labeling Configuration
AUTOLABEL = {
    'workers': 2,
    'dpi': 150,
    'min_confidence': 0.5,
    'train_ratio': 0.8,
    'val_ratio': 0.1,
    'test_ratio': 0.1,
    'max_records_per_report': 10000,
}