""" Configuration settings for the invoice extraction system. """ import os import platform def _is_wsl() -> bool: """Check if running inside WSL (Windows Subsystem for Linux).""" if platform.system() != 'Linux': return False # Check for WSL-specific indicators if os.environ.get('WSL_DISTRO_NAME'): return True try: with open('/proc/version', 'r') as f: return 'microsoft' in f.read().lower() except (FileNotFoundError, PermissionError): return False # PostgreSQL Database Configuration DATABASE = { 'host': '192.168.68.31', 'port': 5432, 'database': 'docmaster', 'user': 'docmaster', 'password': '0412220', } # Connection string for psycopg2 def get_db_connection_string(): return f"postgresql://{DATABASE['user']}:{DATABASE['password']}@{DATABASE['host']}:{DATABASE['port']}/{DATABASE['database']}" # Paths Configuration - auto-detect WSL vs Windows if _is_wsl(): # WSL: use native Linux filesystem for better I/O performance PATHS = { 'csv_dir': os.path.expanduser('~/invoice-data/structured_data'), 'pdf_dir': os.path.expanduser('~/invoice-data/raw_pdfs'), 'output_dir': os.path.expanduser('~/invoice-data/dataset'), 'reports_dir': 'reports', # Keep reports in project directory } else: # Windows or native Linux: use relative paths PATHS = { 'csv_dir': 'data/structured_data', 'pdf_dir': 'data/raw_pdfs', 'output_dir': 'data/dataset', 'reports_dir': 'reports', } # Auto-labeling Configuration AUTOLABEL = { 'workers': 2, 'dpi': 150, 'min_confidence': 0.5, 'train_ratio': 0.8, 'val_ratio': 0.1, 'test_ratio': 0.1, 'max_records_per_report': 10000, }