This commit is contained in:
Yaojia Wang
2026-01-13 00:10:27 +01:00
parent 1b7c61cdd8
commit b26fd61852
43 changed files with 7751 additions and 578 deletions

64
config.py Normal file
View File

@@ -0,0 +1,64 @@
"""
Configuration settings for the invoice extraction system.
"""
import os
import platform
def _is_wsl() -> bool:
"""Check if running inside WSL (Windows Subsystem for Linux)."""
if platform.system() != 'Linux':
return False
# Check for WSL-specific indicators
if os.environ.get('WSL_DISTRO_NAME'):
return True
try:
with open('/proc/version', 'r') as f:
return 'microsoft' in f.read().lower()
except (FileNotFoundError, PermissionError):
return False
# PostgreSQL Database Configuration
DATABASE = {
'host': '192.168.68.31',
'port': 5432,
'database': 'docmaster',
'user': 'docmaster',
'password': '0412220',
}
# Connection string for psycopg2
def get_db_connection_string():
return f"postgresql://{DATABASE['user']}:{DATABASE['password']}@{DATABASE['host']}:{DATABASE['port']}/{DATABASE['database']}"
# Paths Configuration - auto-detect WSL vs Windows
if _is_wsl():
# WSL: use native Linux filesystem for better I/O performance
PATHS = {
'csv_dir': os.path.expanduser('~/invoice-data/structured_data'),
'pdf_dir': os.path.expanduser('~/invoice-data/raw_pdfs'),
'output_dir': os.path.expanduser('~/invoice-data/dataset'),
'reports_dir': 'reports', # Keep reports in project directory
}
else:
# Windows or native Linux: use relative paths
PATHS = {
'csv_dir': 'data/structured_data',
'pdf_dir': 'data/raw_pdfs',
'output_dir': 'data/dataset',
'reports_dir': 'reports',
}
# Auto-labeling Configuration
AUTOLABEL = {
'workers': 2,
'dpi': 150,
'min_confidence': 0.5,
'train_ratio': 0.8,
'val_ratio': 0.1,
'test_ratio': 0.1,
'max_records_per_report': 10000,
}