""" Configuration settings for the invoice extraction system. """ import os import platform from pathlib import Path from dotenv import load_dotenv # Load environment variables from .env file env_path = Path(__file__).parent / '.env' load_dotenv(dotenv_path=env_path) def _is_wsl() -> bool: """Check if running inside WSL (Windows Subsystem for Linux).""" if platform.system() != 'Linux': return False # Check for WSL-specific indicators if os.environ.get('WSL_DISTRO_NAME'): return True try: with open('/proc/version', 'r') as f: return 'microsoft' in f.read().lower() except (FileNotFoundError, PermissionError): return False # PostgreSQL Database Configuration # Now loaded from environment variables for security DATABASE = { 'host': os.getenv('DB_HOST', '192.168.68.31'), 'port': int(os.getenv('DB_PORT', '5432')), 'database': os.getenv('DB_NAME', 'docmaster'), 'user': os.getenv('DB_USER', 'docmaster'), 'password': os.getenv('DB_PASSWORD'), # No default for security } # Validate required configuration if not DATABASE['password']: raise ValueError( "DB_PASSWORD environment variable is not set. " "Please create a .env file based on .env.example and set DB_PASSWORD." ) # Connection string for psycopg2 def get_db_connection_string(): return f"postgresql://{DATABASE['user']}:{DATABASE['password']}@{DATABASE['host']}:{DATABASE['port']}/{DATABASE['database']}" # Paths Configuration - auto-detect WSL vs Windows if _is_wsl(): # WSL: use native Linux filesystem for better I/O performance PATHS = { 'csv_dir': os.path.expanduser('~/invoice-data/structured_data'), 'pdf_dir': os.path.expanduser('~/invoice-data/raw_pdfs'), 'output_dir': os.path.expanduser('~/invoice-data/dataset'), 'reports_dir': 'reports', # Keep reports in project directory } else: # Windows or native Linux: use relative paths PATHS = { 'csv_dir': 'data/structured_data', 'pdf_dir': 'data/raw_pdfs', 'output_dir': 'data/dataset', 'reports_dir': 'reports', } # Auto-labeling Configuration AUTOLABEL = { 'workers': 2, 'dpi': 150, 'min_confidence': 0.5, 'train_ratio': 0.8, 'val_ratio': 0.1, 'test_ratio': 0.1, 'max_records_per_report': 10000, }