WIP

2026-02-07 13:56:00 +01:00
parent 0990239e9c
commit f1a7bfe6b7
16 changed files with 1121 additions and 307 deletions
--- a/packages/backend/backend/cli/infer.py
+++ b/packages/backend/backend/cli/infer.py
@@ -7,10 +7,14 @@ Runs inference on new PDFs to extract invoice data.

 import argparse
 import json
+import logging
 import sys
 from pathlib import Path

 from shared.config import DEFAULT_DPI
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)


 def main():
@@ -66,10 +70,13 @@ def main():

    args = parser.parse_args()

+    # Configure logging for CLI
+    setup_cli_logging()
+
    # Validate model
    model_path = Path(args.model)
    if not model_path.exists():
-        print(f"Error: Model not found: {model_path}", file=sys.stderr)
+        logger.error("Model not found: %s", model_path)
        sys.exit(1)

    # Get input files
@@ -79,16 +86,16 @@ def main():
    elif input_path.is_dir():
        pdf_files = list(input_path.glob('*.pdf'))
    else:
-        print(f"Error: Input not found: {input_path}", file=sys.stderr)
+        logger.error("Input not found: %s", input_path)
        sys.exit(1)

    if not pdf_files:
-        print("Error: No PDF files found", file=sys.stderr)
+        logger.error("No PDF files found")
        sys.exit(1)

    if args.verbose:
-        print(f"Processing {len(pdf_files)} PDF file(s)")
-        print(f"Model: {model_path}")
+        logger.info("Processing %d PDF file(s)", len(pdf_files))
+        logger.info("Model: %s", model_path)

    from backend.pipeline import InferencePipeline

@@ -107,18 +114,18 @@ def main():

    for pdf_path in pdf_files:
        if args.verbose:
-            print(f"Processing: {pdf_path.name}")
+            logger.info("Processing: %s", pdf_path.name)

        result = pipeline.process_pdf(pdf_path)
        results.append(result.to_json())

        if args.verbose:
-            print(f"  Success: {result.success}")
-            print(f"  Fields: {len(result.fields)}")
+            logger.info("  Success: %s", result.success)
+            logger.info("  Fields: %d", len(result.fields))
            if result.fallback_used:
-                print(f"  Fallback used: Yes")
+                logger.info("  Fallback used: Yes")
            if result.errors:
-                print(f"  Errors: {result.errors}")
+                logger.info("  Errors: %s", result.errors)

    # Output results
    if len(results) == 1:
@@ -132,9 +139,11 @@ def main():
        with open(args.output, 'w', encoding='utf-8') as f:
            f.write(json_output)
        if args.verbose:
-            print(f"\nResults written to: {args.output}")
+            logger.info("Results written to: %s", args.output)
    else:
-        print(json_output)
+        # Output JSON to stdout (not logged)
+        sys.stdout.write(json_output)
+        sys.stdout.write('\n')


 if __name__ == '__main__':
--- a/packages/backend/backend/pipeline/pipeline.py
+++ b/packages/backend/backend/pipeline/pipeline.py
@@ -417,7 +417,12 @@ class InferencePipeline:
            result.errors.append(f"Business feature extraction error: {error_detail}")

    def _merge_fields(self, result: InferenceResult) -> None:
-        """Merge extracted fields, keeping highest confidence for each field."""
+        """Merge extracted fields, keeping best candidate for each field.
+
+        Selection priority:
+        1. Prefer candidates without validation errors
+        2. Among equal validity, prefer higher confidence
+        """
        field_candidates: dict[str, list[ExtractedField]] = {}

        for extracted in result.extracted_fields:
@@ -430,7 +435,12 @@ class InferencePipeline:

        # Select best candidate for each field
        for field_name, candidates in field_candidates.items():
-            best = max(candidates, key=lambda x: x.confidence)
+            # Sort by: (no validation error, confidence) - descending
+            # This prefers candidates without errors, then by confidence
+            best = max(
+                candidates,
+                key=lambda x: (x.validation_error is None, x.confidence)
+            )
            result.fields[field_name] = best.normalized_value
            result.confidence[field_name] = best.confidence
            # Store bbox for each field (useful for payment_line and other fields)
--- a/packages/backend/backend/validation/llm_validator.py
+++ b/packages/backend/backend/validation/llm_validator.py
@@ -7,6 +7,7 @@ the autolabel results to identify potential errors.

 import json
 import base64
+import logging
 import os
 from pathlib import Path
 from typing import Optional, Dict, Any, List
@@ -14,6 +15,8 @@ from dataclasses import dataclass, asdict
 from datetime import datetime

 import psycopg2
+
+logger = logging.getLogger(__name__)
 from psycopg2.extras import execute_values

 from shared.config import DEFAULT_DPI
@@ -648,7 +651,7 @@ Return ONLY the JSON object, no other text."""
        docs = self.get_documents_with_failed_matches(limit=limit)

        if verbose:
-            print(f"Found {len(docs)} documents with failed matches to validate")
+            logger.info("Found %d documents with failed matches to validate", len(docs))

        results = []
        for i, doc in enumerate(docs):
@@ -656,16 +659,16 @@ Return ONLY the JSON object, no other text."""

            if verbose:
                failed_fields = [f['field'] for f in doc['failed_fields']]
-                print(f"[{i+1}/{len(docs)}] Validating {doc_id[:8]}... (failed: {', '.join(failed_fields)})")
+                logger.info("[%d/%d] Validating %s... (failed: %s)", i+1, len(docs), doc_id[:8], ', '.join(failed_fields))

            result = self.validate_document(doc_id, provider, model)
            results.append(result)

            if verbose:
                if result.error:
-                    print(f"  ERROR: {result.error}")
+                    logger.error("  ERROR: %s", result.error)
                else:
-                    print(f"  OK ({result.processing_time_ms:.0f}ms)")
+                    logger.info("  OK (%.0fms)", result.processing_time_ms)

        return results

--- a/packages/backend/backend/web/api/v1/admin/training/export.py
+++ b/packages/backend/backend/web/api/v1/admin/training/export.py
@@ -11,6 +11,7 @@ from backend.web.schemas.admin import (
    ExportResponse,
 )
 from backend.web.schemas.common import ErrorResponse
+from shared.bbox import expand_bbox

 logger = logging.getLogger(__name__)

@@ -102,12 +103,52 @@ def register_export_routes(router: APIRouter) -> None:
                    dst_image.write_bytes(image_content)
                    total_images += 1

+                    # Get image dimensions for bbox expansion
+                    img_dims = storage.get_admin_image_dimensions(doc_id, page_num)
+                    if img_dims is None:
+                        # Fall back to standard A4 at 300 DPI if dimensions unavailable
+                        img_width, img_height = 2480, 3508
+                    else:
+                        img_width, img_height = img_dims
+
                    label_name = f"{doc.document_id}_page{page_num}.txt"
                    label_path = export_dir / "labels" / split / label_name

                    with open(label_path, "w") as f:
                        for ann in page_annotations:
-                            line = f"{ann.class_id} {ann.x_center:.6f} {ann.y_center:.6f} {ann.width:.6f} {ann.height:.6f}\n"
+                            # Convert normalized coords to pixel coords
+                            half_w = (ann.width * img_width) / 2
+                            half_h = (ann.height * img_height) / 2
+                            x0 = ann.x_center * img_width - half_w
+                            y0 = ann.y_center * img_height - half_h
+                            x1 = ann.x_center * img_width + half_w
+                            y1 = ann.y_center * img_height + half_h
+
+                            # Use manual_mode for manual/imported annotations
+                            manual_mode = ann.source in ("manual", "imported")
+
+                            # Apply field-specific bbox expansion
+                            ex0, ey0, ex1, ey1 = expand_bbox(
+                                bbox=(x0, y0, x1, y1),
+                                image_width=img_width,
+                                image_height=img_height,
+                                field_type=ann.class_name,
+                                manual_mode=manual_mode,
+                            )
+
+                            # Convert back to normalized YOLO format
+                            new_x_center = (ex0 + ex1) / 2 / img_width
+                            new_y_center = (ey0 + ey1) / 2 / img_height
+                            new_width = (ex1 - ex0) / img_width
+                            new_height = (ey1 - ey0) / img_height
+
+                            # Clamp to valid range
+                            new_x_center = max(0, min(1, new_x_center))
+                            new_y_center = max(0, min(1, new_y_center))
+                            new_width = max(0, min(1, new_width))
+                            new_height = max(0, min(1, new_height))
+
+                            line = f"{ann.class_id} {new_x_center:.6f} {new_y_center:.6f} {new_width:.6f} {new_height:.6f}\n"
                            f.write(line)
                            total_annotations += 1

--- a/packages/shared/shared/logging_config.py
+++ b/packages/shared/shared/logging_config.py
@@ -0,0 +1,62 @@
+"""
+Logging Configuration
+
+Provides consistent logging setup for CLI tools and modules.
+"""
+
+import logging
+import sys
+from typing import Optional
+
+
+def setup_cli_logging(
+    level: int = logging.INFO,
+    name: Optional[str] = None,
+    format_string: Optional[str] = None,
+) -> logging.Logger:
+    """
+    Configure logging for CLI applications.
+
+    Args:
+        level: Logging level (default: INFO)
+        name: Logger name (default: root logger)
+        format_string: Custom format string (default: simple CLI format)
+
+    Returns:
+        Configured logger instance
+    """
+    if format_string is None:
+        format_string = "%(message)s"
+
+    # Configure root logger or specific logger
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+
+    # Remove existing handlers to avoid duplicates
+    logger.handlers.clear()
+
+    # Create console handler
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    handler.setFormatter(logging.Formatter(format_string))
+    logger.addHandler(handler)
+
+    return logger
+
+
+def setup_verbose_logging(
+    level: int = logging.DEBUG,
+    name: Optional[str] = None,
+) -> logging.Logger:
+    """
+    Configure verbose logging with timestamps and module info.
+
+    Args:
+        level: Logging level (default: DEBUG)
+        name: Logger name (default: root logger)
+
+    Returns:
+        Configured logger instance
+    """
+    format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    return setup_cli_logging(level=level, name=name, format_string=format_string)
--- a/packages/training/training/cli/analyze_labels.py
+++ b/packages/training/training/cli/analyze_labels.py
@@ -9,6 +9,7 @@ Now reads from PostgreSQL database instead of JSONL files.
 import argparse
 import csv
 import json
+import logging
 import sys
 from collections import defaultdict
 from dataclasses import dataclass, field
@@ -16,6 +17,9 @@ from pathlib import Path
 from typing import Optional

 from shared.config import get_db_connection_string
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)

 from shared.normalize import normalize_field
 from shared.matcher import FieldMatcher
@@ -104,7 +108,7 @@ class LabelAnalyzer:
            for row in reader:
                doc_id = row['DocumentId']
                self.csv_data[doc_id] = row
-        print(f"Loaded {len(self.csv_data)} records from CSV")
+        logger.info("Loaded %d records from CSV", len(self.csv_data))

    def load_labels(self):
        """Load all label files from dataset."""
@@ -150,12 +154,12 @@ class LabelAnalyzer:
            for doc in self.label_data.values()
            for labels in doc['pages'].values()
        )
-        print(f"Loaded labels for {total_docs} documents ({total_labels} total labels)")
+        logger.info("Loaded labels for %d documents (%d total labels)", total_docs, total_labels)

    def load_report(self):
        """Load autolabel report from database."""
        if not self.db:
-            print("Database not configured, skipping report loading")
+            logger.info("Database not configured, skipping report loading")
            return

        # Get document IDs from CSV to query
@@ -175,7 +179,7 @@ class LabelAnalyzer:
                    self.report_data[doc_id] = doc
                    loaded += 1

-        print(f"Loaded {loaded} autolabel reports from database")
+        logger.info("Loaded %d autolabel reports from database", loaded)

    def analyze_document(self, doc_id: str, skip_missing_pdf: bool = True) -> Optional[DocumentAnalysis]:
        """Analyze a single document."""
@@ -373,7 +377,7 @@ class LabelAnalyzer:
                break

        if skipped > 0:
-            print(f"Skipped {skipped} documents without PDF files")
+            logger.info("Skipped %d documents without PDF files", skipped)

        return results

@@ -447,7 +451,7 @@ class LabelAnalyzer:
        with open(output, 'w', encoding='utf-8') as f:
            json.dump(report, f, indent=2, ensure_ascii=False)

-        print(f"\nReport saved to: {output}")
+        logger.info("Report saved to: %s", output)

        return report

@@ -456,52 +460,52 @@ def print_summary(report: dict):
    """Print summary to console."""
    summary = report['summary']

-    print("\n" + "=" * 60)
-    print("LABEL ANALYSIS SUMMARY")
-    print("=" * 60)
+    logger.info("=" * 60)
+    logger.info("LABEL ANALYSIS SUMMARY")
+    logger.info("=" * 60)

-    print(f"\nDocuments:")
-    print(f"  Total:        {summary['total_documents']}")
-    print(f"  With issues:  {summary['documents_with_issues']} ({summary['issue_rate']})")
+    logger.info("Documents:")
+    logger.info("  Total:        %d", summary['total_documents'])
+    logger.info("  With issues:  %d (%s)", summary['documents_with_issues'], summary['issue_rate'])

-    print(f"\nFields:")
-    print(f"  Expected:     {summary['total_expected_fields']}")
-    print(f"  Labeled:      {summary['total_labeled_fields']} ({summary['label_coverage']})")
-    print(f"  Missing:      {summary['missing_labels']}")
-    print(f"  Extra:        {summary['extra_labels']}")
+    logger.info("Fields:")
+    logger.info("  Expected:     %d", summary['total_expected_fields'])
+    logger.info("  Labeled:      %d (%s)", summary['total_labeled_fields'], summary['label_coverage'])
+    logger.info("  Missing:      %d", summary['missing_labels'])
+    logger.info("  Extra:        %d", summary['extra_labels'])

-    print(f"\nFailure Reasons:")
+    logger.info("Failure Reasons:")
    for reason, count in sorted(report['failure_reasons'].items(), key=lambda x: -x[1]):
-        print(f"  {reason}: {count}")
+        logger.info("  %s: %d", reason, count)

-    print(f"\nFailures by Field:")
+    logger.info("Failures by Field:")
    for field, reasons in report['failures_by_field'].items():
        total = sum(reasons.values())
-        print(f"  {field}: {total}")
+        logger.info("  %s: %d", field, total)
        for reason, count in sorted(reasons.items(), key=lambda x: -x[1]):
-            print(f"    - {reason}: {count}")
+            logger.info("    - %s: %d", reason, count)

    # Show sample issues
    if report['issues']:
-        print(f"\n" + "-" * 60)
-        print("SAMPLE ISSUES (first 10)")
-        print("-" * 60)
+        logger.info("-" * 60)
+        logger.info("SAMPLE ISSUES (first 10)")
+        logger.info("-" * 60)

        for issue in report['issues'][:10]:
-            print(f"\n[{issue['doc_id']}] {issue['field']}")
-            print(f"  CSV value: {issue['csv_value']}")
-            print(f"  Reason: {issue['reason']}")
+            logger.info("[%s] %s", issue['doc_id'], issue['field'])
+            logger.info("  CSV value: %s", issue['csv_value'])
+            logger.info("  Reason: %s", issue['reason'])

            if issue.get('details'):
                details = issue['details']
                if details.get('normalized_candidates'):
-                    print(f"  Candidates: {details['normalized_candidates'][:5]}")
+                    logger.info("  Candidates: %s", details['normalized_candidates'][:5])
                if details.get('pdf_tokens_sample'):
-                    print(f"  PDF samples: {details['pdf_tokens_sample'][:5]}")
+                    logger.info("  PDF samples: %s", details['pdf_tokens_sample'][:5])
                if details.get('potential_matches'):
-                    print(f"  Potential matches:")
+                    logger.info("  Potential matches:")
                    for pm in details['potential_matches'][:3]:
-                        print(f"    - token='{pm['token']}' matches candidate='{pm['candidate']}'")
+                        logger.info("    - token='%s' matches candidate='%s'", pm['token'], pm['candidate'])


 def main():
@@ -551,6 +555,9 @@ def main():

    args = parser.parse_args()

+    # Configure logging for CLI
+    setup_cli_logging()
+
    analyzer = LabelAnalyzer(
        csv_path=args.csv,
        pdf_dir=args.pdf_dir,
@@ -566,30 +573,30 @@ def main():

        analysis = analyzer.analyze_document(args.single)

-        print(f"\n{'=' * 60}")
-        print(f"Document: {analysis.doc_id}")
-        print(f"{'=' * 60}")
-        print(f"PDF exists: {analysis.pdf_exists}")
-        print(f"PDF type: {analysis.pdf_type}")
-        print(f"Pages: {analysis.total_pages}")
-        print(f"\nFields (CSV: {analysis.csv_fields_count}, Labeled: {analysis.labeled_fields_count}):")
+        logger.info("=" * 60)
+        logger.info("Document: %s", analysis.doc_id)
+        logger.info("=" * 60)
+        logger.info("PDF exists: %s", analysis.pdf_exists)
+        logger.info("PDF type: %s", analysis.pdf_type)
+        logger.info("Pages: %d", analysis.total_pages)
+        logger.info("Fields (CSV: %d, Labeled: %d):", analysis.csv_fields_count, analysis.labeled_fields_count)

        for f in analysis.fields:
-            status = "✓" if f.labeled else ("✗" if f.expected else "-")
+            status = "[OK]" if f.labeled else ("[FAIL]" if f.expected else "[-]")
            value_str = f.csv_value[:30] if f.csv_value else "(empty)"
-            print(f"  [{status}] {f.field_name}: {value_str}")
+            logger.info("  %s %s: %s", status, f.field_name, value_str)

            if f.failure_reason:
-                print(f"      Reason: {f.failure_reason}")
+                logger.info("      Reason: %s", f.failure_reason)
                if f.details.get('normalized_candidates'):
-                    print(f"      Candidates: {f.details['normalized_candidates']}")
+                    logger.info("      Candidates: %s", f.details['normalized_candidates'])
                if f.details.get('potential_matches'):
-                    print(f"      Potential matches in PDF:")
+                    logger.info("      Potential matches in PDF:")
                    for pm in f.details['potential_matches'][:3]:
-                        print(f"        - '{pm['token']}'")
+                        logger.info("        - '%s'", pm['token'])
    else:
        # Full analysis
-        print("Running label analysis...")
+        logger.info("Running label analysis...")
        results = analyzer.run_analysis(limit=args.limit)
        report = analyzer.generate_report(results, args.output, verbose=args.verbose)
        print_summary(report)
--- a/packages/training/training/cli/analyze_report.py
+++ b/packages/training/training/cli/analyze_report.py
@@ -7,11 +7,15 @@ Generates statistics and insights from database or autolabel_report.jsonl

 import argparse
 import json
+import logging
 import sys
 from collections import defaultdict
 from pathlib import Path

 from shared.config import get_db_connection_string
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)


 def load_reports_from_db() -> dict:
@@ -147,9 +151,9 @@ def load_reports_from_file(report_path: str) -> list[dict]:
    if not report_files:
        return []

-    print(f"Reading {len(report_files)} report file(s):")
+    logger.info("Reading %d report file(s):", len(report_files))
    for f in report_files:
-        print(f"  - {f.name}")
+        logger.info("  - %s", f.name)

    reports = []
    for report_file in report_files:
@@ -231,55 +235,55 @@ def analyze_reports(reports: list[dict]) -> dict:

 def print_report(stats: dict, verbose: bool = False):
    """Print analysis report."""
-    print("\n" + "=" * 60)
-    print("AUTO-LABEL REPORT ANALYSIS")
-    print("=" * 60)
+    logger.info("=" * 60)
+    logger.info("AUTO-LABEL REPORT ANALYSIS")
+    logger.info("=" * 60)

    # Overall stats
-    print(f"\n{'OVERALL STATISTICS':^60}")
-    print("-" * 60)
+    logger.info("%s", "OVERALL STATISTICS".center(60))
+    logger.info("-" * 60)
    total = stats['total']
    successful = stats['successful']
    failed = stats['failed']
    success_rate = successful / total * 100 if total > 0 else 0

-    print(f"Total documents:     {total:>8}")
-    print(f"Successful:          {successful:>8} ({success_rate:.1f}%)")
-    print(f"Failed:              {failed:>8} ({100-success_rate:.1f}%)")
+    logger.info("Total documents:     %8d", total)
+    logger.info("Successful:          %8d (%.1f%%)", successful, success_rate)
+    logger.info("Failed:              %8d (%.1f%%)", failed, 100-success_rate)

    # Processing time
    if 'processing_time_stats' in stats:
        pts = stats['processing_time_stats']
-        print(f"\nProcessing time (ms):")
-        print(f"  Average:           {pts['avg_ms']:>8.1f}")
-        print(f"  Min:               {pts['min_ms']:>8.1f}")
-        print(f"  Max:               {pts['max_ms']:>8.1f}")
+        logger.info("Processing time (ms):")
+        logger.info("  Average:           %8.1f", pts['avg_ms'])
+        logger.info("  Min:               %8.1f", pts['min_ms'])
+        logger.info("  Max:               %8.1f", pts['max_ms'])
    elif stats.get('processing_times'):
        times = stats['processing_times']
        avg_time = sum(times) / len(times)
        min_time = min(times)
        max_time = max(times)
-        print(f"\nProcessing time (ms):")
-        print(f"  Average:           {avg_time:>8.1f}")
-        print(f"  Min:               {min_time:>8.1f}")
-        print(f"  Max:               {max_time:>8.1f}")
+        logger.info("Processing time (ms):")
+        logger.info("  Average:           %8.1f", avg_time)
+        logger.info("  Min:               %8.1f", min_time)
+        logger.info("  Max:               %8.1f", max_time)

    # By PDF type
-    print(f"\n{'BY PDF TYPE':^60}")
-    print("-" * 60)
-    print(f"{'Type':<15} {'Total':>10} {'Success':>10} {'Rate':>10}")
-    print("-" * 60)
+    logger.info("%s", "BY PDF TYPE".center(60))
+    logger.info("-" * 60)
+    logger.info("%-15s %10s %10s %10s", 'Type', 'Total', 'Success', 'Rate')
+    logger.info("-" * 60)
    for pdf_type, type_stats in sorted(stats['by_pdf_type'].items()):
        type_total = type_stats['total']
        type_success = type_stats['successful']
        type_rate = type_success / type_total * 100 if type_total > 0 else 0
-        print(f"{pdf_type:<15} {type_total:>10} {type_success:>10} {type_rate:>9.1f}%")
+        logger.info("%-15s %10d %10d %9.1f%%", pdf_type, type_total, type_success, type_rate)

    # By field
-    print(f"\n{'FIELD MATCH STATISTICS':^60}")
-    print("-" * 60)
-    print(f"{'Field':<18} {'Total':>7} {'Match':>7} {'Rate':>7} {'Exact':>7} {'Flex':>7} {'AvgScore':>8}")
-    print("-" * 60)
+    logger.info("%s", "FIELD MATCH STATISTICS".center(60))
+    logger.info("-" * 60)
+    logger.info("%-18s %7s %7s %7s %7s %7s %8s", 'Field', 'Total', 'Match', 'Rate', 'Exact', 'Flex', 'AvgScore')
+    logger.info("-" * 60)

    for field_name in ['InvoiceNumber', 'InvoiceDate', 'InvoiceDueDate', 'OCR', 'Bankgiro', 'Plusgiro', 'Amount']:
        if field_name not in stats['by_field']:
@@ -299,16 +303,16 @@ def print_report(stats: dict, verbose: bool = False):
        else:
            avg_score = 0

-        print(f"{field_name:<18} {total:>7} {matched:>7} {rate:>6.1f}% {exact:>7} {flex:>7} {avg_score:>8.3f}")
+        logger.info("%-18s %7d %7d %6.1f%% %7d %7d %8.3f", field_name, total, matched, rate, exact, flex, avg_score)

    # Field match by PDF type
-    print(f"\n{'FIELD MATCH BY PDF TYPE':^60}")
-    print("-" * 60)
+    logger.info("%s", "FIELD MATCH BY PDF TYPE".center(60))
+    logger.info("-" * 60)

    for pdf_type in sorted(stats['by_pdf_type'].keys()):
-        print(f"\n[{pdf_type.upper()}]")
-        print(f"{'Field':<18} {'Total':>10} {'Matched':>10} {'Rate':>10}")
-        print("-" * 50)
+        logger.info("[%s]", pdf_type.upper())
+        logger.info("%-18s %10s %10s %10s", 'Field', 'Total', 'Matched', 'Rate')
+        logger.info("-" * 50)

        for field_name in ['InvoiceNumber', 'InvoiceDate', 'InvoiceDueDate', 'OCR', 'Bankgiro', 'Plusgiro', 'Amount']:
            if field_name not in stats['by_field']:
@@ -317,16 +321,16 @@ def print_report(stats: dict, verbose: bool = False):
            total = type_stats['total']
            matched = type_stats['matched']
            rate = matched / total * 100 if total > 0 else 0
-            print(f"{field_name:<18} {total:>10} {matched:>10} {rate:>9.1f}%")
+            logger.info("%-18s %10d %10d %9.1f%%", field_name, total, matched, rate)

    # Errors
    if stats.get('errors') and verbose:
-        print(f"\n{'ERRORS':^60}")
-        print("-" * 60)
+        logger.info("%s", "ERRORS".center(60))
+        logger.info("-" * 60)
        for error, count in sorted(stats['errors'].items(), key=lambda x: -x[1])[:20]:
-            print(f"{count:>5}x  {error[:50]}")
+            logger.info("%5dx  %s", count, error[:50])

-    print("\n" + "=" * 60)
+    logger.info("=" * 60)


 def export_json(stats: dict, output_path: str):
@@ -372,7 +376,7 @@ def export_json(stats: dict, output_path: str):
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(export_data, f, indent=2, ensure_ascii=False)

-    print(f"\nStatistics exported to: {output_path}")
+    logger.info("Statistics exported to: %s", output_path)


 def main():
@@ -401,25 +405,28 @@ def main():

    args = parser.parse_args()

+    # Configure logging for CLI
+    setup_cli_logging()
+
    # Decide source
    use_db = not args.from_file and args.report is None

    if use_db:
-        print("Loading statistics from database...")
+        logger.info("Loading statistics from database...")
        stats = load_reports_from_db()
-        print(f"Loaded stats for {stats['total']} documents")
+        logger.info("Loaded stats for %d documents", stats['total'])
    else:
        report_path = args.report or 'reports/autolabel_report.jsonl'
        path = Path(report_path)

        # Check if file exists (handle glob patterns)
        if '*' not in str(path) and '?' not in str(path) and not path.exists():
-            print(f"Error: Report file not found: {path}")
+            logger.error("Report file not found: %s", path)
            return 1

-        print(f"Loading reports from: {report_path}")
+        logger.info("Loading reports from: %s", report_path)
        reports = load_reports_from_file(report_path)
-        print(f"Loaded {len(reports)} reports")
+        logger.info("Loaded %d reports", len(reports))
        stats = analyze_reports(reports)

    print_report(stats, verbose=args.verbose)
--- a/packages/training/training/cli/autolabel.py
+++ b/packages/training/training/cli/autolabel.py
@@ -6,6 +6,7 @@ Generates YOLO training data from PDFs and structured CSV data.
 """

 import argparse
+import logging
 import sys
 import time
 import os
@@ -17,6 +18,10 @@ from tqdm import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed, TimeoutError
 import multiprocessing

+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)
+
 # Global flag for graceful shutdown
 _shutdown_requested = False

@@ -25,8 +30,8 @@ def _signal_handler(signum, frame):
    """Handle interrupt signals for graceful shutdown."""
    global _shutdown_requested
    _shutdown_requested = True
-    print("\n\nShutdown requested. Finishing current batch and saving progress...")
-    print("(Press Ctrl+C again to force quit)\n")
+    logger.warning("Shutdown requested. Finishing current batch and saving progress...")
+    logger.warning("(Press Ctrl+C again to force quit)")

 # Windows compatibility: use 'spawn' method for multiprocessing
 # This is required on Windows and is also safer for libraries like PaddleOCR
@@ -350,11 +355,14 @@ def main():
    if ',' in csv_input and '*' not in csv_input:
        csv_input = [p.strip() for p in csv_input.split(',')]

+    # Configure logging for CLI
+    setup_cli_logging()
+
    # Get list of CSV files (don't load all data at once)
    temp_loader = CSVLoader(csv_input, args.pdf_dir)
    csv_files = temp_loader.csv_paths
    pdf_dir = temp_loader.pdf_dir
-    print(f"Found {len(csv_files)} CSV file(s) to process")
+    logger.info("Found %d CSV file(s) to process", len(csv_files))

    # Setup output directories
    output_dir = Path(args.output)
@@ -371,7 +379,7 @@ def main():
    db = DocumentDB()
    db.connect()
    db.create_tables()  # Ensure tables exist
-    print("Connected to database for status checking")
+    logger.info("Connected to database for status checking")

    # Global stats
    stats = {
@@ -443,7 +451,7 @@ def main():
            db.save_documents_batch(db_batch)
            db_batch.clear()
        if args.verbose:
-            print(f"Error processing {doc_id}: {error}")
+            logger.error("Error processing %s: %s", doc_id, error)

    # Initialize dual-pool coordinator if enabled (keeps workers alive across CSVs)
    dual_pool_coordinator = None
@@ -453,7 +461,7 @@ def main():
        from training.processing import DualPoolCoordinator
        from training.processing.autolabel_tasks import process_text_pdf, process_scanned_pdf

-        print(f"\nStarting dual-pool mode: {args.cpu_workers} CPU + {args.gpu_workers} GPU workers")
+        logger.info("Starting dual-pool mode: %d CPU + %d GPU workers", args.cpu_workers, args.gpu_workers)
        dual_pool_coordinator = DualPoolCoordinator(
            cpu_workers=args.cpu_workers,
            gpu_workers=args.gpu_workers,
@@ -467,10 +475,10 @@ def main():
        for csv_idx, csv_file in enumerate(csv_files):
            # Check for shutdown request
            if _shutdown_requested:
-                print("\nShutdown requested. Stopping after current batch...")
+                logger.warning("Shutdown requested. Stopping after current batch...")
                break

-            print(f"\n[{csv_idx + 1}/{len(csv_files)}] Processing: {csv_file.name}")
+            logger.info("[%d/%d] Processing: %s", csv_idx + 1, len(csv_files), csv_file.name)

            # Load only this CSV file
            single_loader = CSVLoader(str(csv_file), str(pdf_dir))
@@ -488,7 +496,7 @@ def main():
                seen_doc_ids.add(r.DocumentId)

            if not rows:
-                print(f"  Skipping CSV (no new documents)")
+                logger.info("  Skipping CSV (no new documents)")
                continue

            # Batch query database for all document IDs in this CSV
@@ -500,13 +508,13 @@ def main():

            # Skip entire CSV if all documents are already processed
            if already_processed == len(rows):
-                print(f"  Skipping CSV (all {len(rows)} documents already processed)")
+                logger.info("  Skipping CSV (all %d documents already processed)", len(rows))
                stats['skipped_db'] += len(rows)
                continue

            # Count how many new documents need processing in this CSV
            new_to_process = len(rows) - already_processed
-            print(f"  Found {new_to_process} new documents to process ({already_processed} already in DB)")
+            logger.info("  Found %d new documents to process (%d already in DB)", new_to_process, already_processed)

            stats['total'] += len(rows)

@@ -520,7 +528,7 @@ def main():
            if args.limit:
                remaining_limit = args.limit - stats.get('tasks_submitted', 0)
                if remaining_limit <= 0:
-                    print(f"  Reached limit of {args.limit} new documents, stopping.")
+                    logger.info("  Reached limit of %d new documents, stopping.", args.limit)
                    break
            else:
                remaining_limit = float('inf')
@@ -583,7 +591,7 @@ def main():
                ))

            if skipped_in_csv > 0 or retry_in_csv > 0:
-                print(f"  Skipped {skipped_in_csv} (already in DB), retrying {retry_in_csv} failed")
+                logger.info("  Skipped %d (already in DB), retrying %d failed", skipped_in_csv, retry_in_csv)

            # Clean up retry documents: delete from database and remove temp folders
            if retry_doc_ids:
@@ -599,7 +607,7 @@ def main():
                    temp_doc_dir = output_dir / 'temp' / doc_id
                    if temp_doc_dir.exists():
                        shutil.rmtree(temp_doc_dir, ignore_errors=True)
-                print(f"  Cleaned up {len(retry_doc_ids)} retry documents (DB + temp folders)")
+                logger.info("  Cleaned up %d retry documents (DB + temp folders)", len(retry_doc_ids))

            if not tasks:
                continue
@@ -636,7 +644,7 @@ def main():
                # Count task types
                text_count = sum(1 for d in documents if not d["is_scanned"])
                scan_count = len(documents) - text_count
-                print(f"  Text PDFs: {text_count}, Scanned PDFs: {scan_count}")
+                logger.info("  Text PDFs: %d, Scanned PDFs: %d", text_count, scan_count)

                # Progress tracking with tqdm
                pbar = tqdm(total=len(documents), desc="Processing")
@@ -667,11 +675,11 @@ def main():
                # Log summary
                successful = sum(1 for r in results if r.success)
                failed = len(results) - successful
-                print(f"  Batch complete: {successful} successful, {failed} failed")
+                logger.info("  Batch complete: %d successful, %d failed", successful, failed)

            else:
                # Single-pool mode (original behavior)
-                print(f"  Processing {len(tasks)} documents with {args.workers} workers...")
+                logger.info("  Processing %d documents with %d workers...", len(tasks), args.workers)

                # Process documents in parallel (inside CSV loop for streaming)
                # Use single process for debugging or when workers=1
@@ -725,28 +733,28 @@ def main():
    db.close()

    # Print summary
-    print("\n" + "=" * 50)
-    print("Auto-labeling Complete")
-    print("=" * 50)
-    print(f"Total documents:   {stats['total']}")
-    print(f"Successful:        {stats['successful']}")
-    print(f"Failed:            {stats['failed']}")
-    print(f"Skipped (no PDF):  {stats['skipped']}")
-    print(f"Skipped (in DB):   {stats['skipped_db']}")
-    print(f"Retried (failed):  {stats['retried']}")
-    print(f"Total annotations: {stats['annotations']}")
-    print(f"\nImages saved to: {output_dir / 'temp'}")
-    print(f"Labels stored in: PostgreSQL database")
-    print(f"\nAnnotations by field:")
+    logger.info("=" * 50)
+    logger.info("Auto-labeling Complete")
+    logger.info("=" * 50)
+    logger.info("Total documents:   %d", stats['total'])
+    logger.info("Successful:        %d", stats['successful'])
+    logger.info("Failed:            %d", stats['failed'])
+    logger.info("Skipped (no PDF):  %d", stats['skipped'])
+    logger.info("Skipped (in DB):   %d", stats['skipped_db'])
+    logger.info("Retried (failed):  %d", stats['retried'])
+    logger.info("Total annotations: %d", stats['annotations'])
+    logger.info("Images saved to: %s", output_dir / 'temp')
+    logger.info("Labels stored in: PostgreSQL database")
+    logger.info("Annotations by field:")
    for field, count in stats['by_field'].items():
-        print(f"  {field}: {count}")
+        logger.info("  %s: %d", field, count)
    shard_files = report_writer.get_shard_files()
    if len(shard_files) > 1:
-        print(f"\nReport files ({len(shard_files)}):")
+        logger.info("Report files (%d):", len(shard_files))
        for sf in shard_files:
-            print(f"  - {sf}")
+            logger.info("  - %s", sf)
    else:
-        print(f"\nReport: {shard_files[0] if shard_files else args.report}")
+        logger.info("Report: %s", shard_files[0] if shard_files else args.report)


 if __name__ == '__main__':
--- a/packages/training/training/cli/import_report_to_db.py
+++ b/packages/training/training/cli/import_report_to_db.py
@@ -8,6 +8,7 @@ Usage:

 import argparse
 import json
+import logging
 import sys
 from pathlib import Path

@@ -16,6 +17,9 @@ from psycopg2.extras import execute_values

 # Add project root to path
 from shared.config import get_db_connection_string, PATHS
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)


 def create_tables(conn):
@@ -150,7 +154,7 @@ def import_jsonl_file(conn, jsonl_path: Path, skip_existing: bool = True, batch_
            try:
                record = json.loads(line)
            except json.JSONDecodeError as e:
-                print(f"  Warning: Line {line_no} - JSON parse error: {e}")
+                logger.warning("Line %d - JSON parse error: %s", line_no, e)
                stats['errors'] += 1
                continue

@@ -211,7 +215,7 @@ def import_jsonl_file(conn, jsonl_path: Path, skip_existing: bool = True, batch_
            # Flush batch if needed
            if len(doc_batch) >= batch_size:
                flush_batches()
-                print(f"  Processed {stats['imported'] + stats['skipped']} records...")
+                logger.info("  Processed %d records...", stats['imported'] + stats['skipped'])

    # Final flush
    flush_batches()
@@ -243,11 +247,14 @@ def main():
    else:
        report_files = [report_path] if report_path.exists() else []

+    # Configure logging for CLI
+    setup_cli_logging()
+
    if not report_files:
-        print(f"No report files found: {args.report}")
+        logger.error("No report files found: %s", args.report)
        return

-    print(f"Found {len(report_files)} report file(s)")
+    logger.info("Found %d report file(s)", len(report_files))

    # Connect to database
    conn = psycopg2.connect(db_connection)
@@ -257,20 +264,20 @@ def main():
    total_stats = {'imported': 0, 'skipped': 0, 'errors': 0}

    for report_file in report_files:
-        print(f"\nImporting: {report_file.name}")
+        logger.info("Importing: %s", report_file.name)
        stats = import_jsonl_file(conn, report_file, skip_existing=not args.no_skip, batch_size=args.batch_size)
-        print(f"  Imported: {stats['imported']}, Skipped: {stats['skipped']}, Errors: {stats['errors']}")
+        logger.info("  Imported: %d, Skipped: %d, Errors: %d", stats['imported'], stats['skipped'], stats['errors'])

        for key in total_stats:
            total_stats[key] += stats[key]

    # Print summary
-    print("\n" + "=" * 50)
-    print("Import Complete")
-    print("=" * 50)
-    print(f"Total imported: {total_stats['imported']}")
-    print(f"Total skipped:  {total_stats['skipped']}")
-    print(f"Total errors:   {total_stats['errors']}")
+    logger.info("=" * 50)
+    logger.info("Import Complete")
+    logger.info("=" * 50)
+    logger.info("Total imported: %d", total_stats['imported'])
+    logger.info("Total skipped:  %d", total_stats['skipped'])
+    logger.info("Total errors:   %d", total_stats['errors'])

    # Quick stats from database
    with conn.cursor() as cursor:
@@ -288,11 +295,11 @@ def main():

    conn.close()

-    print(f"\nDatabase Stats:")
-    print(f"  Documents: {total_docs} ({success_docs} successful)")
-    print(f"  Field results: {total_fields} ({matched_fields} matched)")
+    logger.info("Database Stats:")
+    logger.info("  Documents: %d (%d successful)", total_docs, success_docs)
+    logger.info("  Field results: %d (%d matched)", total_fields, matched_fields)
    if total_fields > 0:
-        print(f"  Match rate: {matched_fields / total_fields * 100:.2f}%")
+        logger.info("  Match rate: %.2f%%", matched_fields / total_fields * 100)


 if __name__ == '__main__':
--- a/packages/training/training/cli/reprocess_failed.py
+++ b/packages/training/training/cli/reprocess_failed.py
@@ -7,6 +7,7 @@ CSV values, and source CSV filename in a new table.
 import argparse
 import json
 import glob
+import logging
 import os
 import sys
 import time
@@ -20,6 +21,9 @@ from shared.config import DEFAULT_DPI
 from shared.data.db import DocumentDB
 from shared.data.csv_loader import CSVLoader
 from shared.normalize.normalizer import normalize_field
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)


 def create_failed_match_table(db: DocumentDB):
@@ -57,7 +61,7 @@ def create_failed_match_table(db: DocumentDB):
            CREATE INDEX IF NOT EXISTS idx_failed_match_matched ON failed_match_details(matched);
        """)
    conn.commit()
-    print("Created table: failed_match_details")
+    logger.info("Created table: failed_match_details")


 def get_failed_documents(db: DocumentDB) -> list:
@@ -332,14 +336,17 @@ def main():
    parser.add_argument('--limit', type=int, help='Limit number of documents to process')
    args = parser.parse_args()

+    # Configure logging for CLI
+    setup_cli_logging()
+
    # Expand CSV glob
    csv_files = sorted(glob.glob(args.csv))
-    print(f"Found {len(csv_files)} CSV files")
+    logger.info("Found %d CSV files", len(csv_files))

    # Build CSV cache
-    print("Building CSV filename cache...")
+    logger.info("Building CSV filename cache...")
    build_csv_cache(csv_files)
-    print(f"Cached {len(_csv_cache)} document IDs")
+    logger.info("Cached %d document IDs", len(_csv_cache))

    # Connect to database
    db = DocumentDB()
@@ -349,13 +356,13 @@ def main():
    create_failed_match_table(db)

    # Get all failed documents
-    print("Fetching failed documents...")
+    logger.info("Fetching failed documents...")
    failed_docs = get_failed_documents(db)
-    print(f"Found {len(failed_docs)} documents with failed matches")
+    logger.info("Found %d documents with failed matches", len(failed_docs))

    if args.limit:
        failed_docs = failed_docs[:args.limit]
-        print(f"Limited to {len(failed_docs)} documents")
+        logger.info("Limited to %d documents", len(failed_docs))

    # Prepare tasks
    tasks = []
@@ -365,7 +372,7 @@ def main():
        if failed_fields:
            tasks.append((doc, failed_fields, csv_filename))

-    print(f"Processing {len(tasks)} documents with {args.workers} workers...")
+    logger.info("Processing %d documents with %d workers...", len(tasks), args.workers)

    # Process with multiprocessing
    total_results = 0
@@ -389,15 +396,15 @@ def main():
                    batch_results = []

            except TimeoutError:
-                print(f"\nTimeout processing {doc_id}")
+                logger.warning("Timeout processing %s", doc_id)
            except Exception as e:
-                print(f"\nError processing {doc_id}: {e}")
+                logger.error("Error processing %s: %s", doc_id, e)

    # Save remaining results
    if batch_results:
        save_results_batch(db, batch_results)

-    print(f"\nDone! Saved {total_results} failed match records to failed_match_details table")
+    logger.info("Done! Saved %d failed match records to failed_match_details table", total_results)

    # Show summary
    conn = db.connect()
@@ -410,12 +417,12 @@ def main():
            GROUP BY field_name
            ORDER BY total DESC
        """)
-        print("\nSummary by field:")
-        print("-" * 70)
-        print(f"{'Field':<35} {'Total':>8} {'Has OCR':>10} {'Avg Score':>12}")
-        print("-" * 70)
+        logger.info("Summary by field:")
+        logger.info("-" * 70)
+        logger.info("%-35s %8s %10s %12s", 'Field', 'Total', 'Has OCR', 'Avg Score')
+        logger.info("-" * 70)
        for row in cursor.fetchall():
-            print(f"{row[0]:<35} {row[1]:>8} {row[2]:>10} {row[3]:>12.2f}")
+            logger.info("%-35s %8d %10d %12.2f", row[0], row[1], row[2], row[3])

    db.close()

--- a/packages/training/training/cli/train.py
+++ b/packages/training/training/cli/train.py
@@ -7,10 +7,14 @@ Images are read from filesystem, labels are dynamically generated from DB.
 """

 import argparse
+import logging
 import sys
 from pathlib import Path

 from shared.config import DEFAULT_DPI, PATHS
+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)


 def main():
@@ -119,47 +123,50 @@ def main():

    args = parser.parse_args()

+    # Configure logging for CLI
+    setup_cli_logging()
+
    # Apply low-memory mode if specified
    if args.low_memory:
-        print("🔧 Low memory mode enabled")
+        logger.info("Low memory mode enabled")
        args.batch = min(args.batch, 8)  # Reduce from 16 to 8
        args.workers = min(args.workers, 4)  # Reduce from 8 to 4
        args.cache = False
-        print(f"   Batch size: {args.batch}")
-        print(f"   Workers: {args.workers}")
-        print(f"   Cache: disabled")
+        logger.info("   Batch size: %d", args.batch)
+        logger.info("   Workers: %d", args.workers)
+        logger.info("   Cache: disabled")

    # Validate dataset directory
    dataset_dir = Path(args.dataset_dir)
    temp_dir = dataset_dir / 'temp'
    if not temp_dir.exists():
-        print(f"Error: Temp directory not found: {temp_dir}")
-        print("Run autolabel first to generate images.")
+        logger.error("Temp directory not found: %s", temp_dir)
+        logger.error("Run autolabel first to generate images.")
        sys.exit(1)

-    print("=" * 60)
-    print("YOLO Training with Database Labels")
-    print("=" * 60)
-    print(f"Dataset dir: {dataset_dir}")
-    print(f"Model: {args.model}")
-    print(f"Epochs: {args.epochs}")
-    print(f"Batch size: {args.batch}")
-    print(f"Image size: {args.imgsz}")
-    print(f"Split ratio: {args.train_ratio}/{args.val_ratio}/{1-args.train_ratio-args.val_ratio:.1f}")
+    logger.info("=" * 60)
+    logger.info("YOLO Training with Database Labels")
+    logger.info("=" * 60)
+    logger.info("Dataset dir: %s", dataset_dir)
+    logger.info("Model: %s", args.model)
+    logger.info("Epochs: %d", args.epochs)
+    logger.info("Batch size: %d", args.batch)
+    logger.info("Image size: %d", args.imgsz)
+    logger.info("Split ratio: %s/%s/%.1f", args.train_ratio, args.val_ratio, 1-args.train_ratio-args.val_ratio)
    if args.limit:
-        print(f"Document limit: {args.limit}")
+        logger.info("Document limit: %d", args.limit)

    # Connect to database
    from shared.data.db import DocumentDB

-    print("\nConnecting to database...")
+    logger.info("Connecting to database...")
    db = DocumentDB()
    db.connect()

    # Create datasets from database
    from training.yolo.db_dataset import create_datasets

-    print("Loading dataset from database...")
+    logger.info("Loading dataset from database...")
    datasets = create_datasets(
        images_dir=dataset_dir,
        db=db,
@@ -170,39 +177,39 @@ def main():
        limit=args.limit
    )

-    print(f"\nDataset splits:")
-    print(f"  Train: {len(datasets['train'])} items")
-    print(f"  Val:   {len(datasets['val'])} items")
-    print(f"  Test:  {len(datasets['test'])} items")
+    logger.info("Dataset splits:")
+    logger.info("  Train: %d items", len(datasets['train']))
+    logger.info("  Val:   %d items", len(datasets['val']))
+    logger.info("  Test:  %d items", len(datasets['test']))

    if len(datasets['train']) == 0:
-        print("\nError: No training data found!")
-        print("Make sure autolabel has been run and images exist in temp directory.")
+        logger.error("No training data found!")
+        logger.error("Make sure autolabel has been run and images exist in temp directory.")
        db.close()
        sys.exit(1)

    # Export to YOLO format (required for Ultralytics training)
-    print("\nExporting dataset to YOLO format...")
+    logger.info("Exporting dataset to YOLO format...")
    for split_name, dataset in datasets.items():
        count = dataset.export_to_yolo_format(dataset_dir, split_name)
-        print(f"  {split_name}: {count} items exported")
+        logger.info("  %s: %d items exported", split_name, count)

    # Generate YOLO config files
    from training.yolo.annotation_generator import AnnotationGenerator

    AnnotationGenerator.generate_classes_file(dataset_dir / 'classes.txt')
    AnnotationGenerator.generate_yaml_config(dataset_dir / 'dataset.yaml')
-    print(f"\nGenerated dataset.yaml at: {dataset_dir / 'dataset.yaml'}")
+    logger.info("Generated dataset.yaml at: %s", dataset_dir / 'dataset.yaml')

    if args.export_only:
-        print("\nExport complete (--export-only specified, skipping training)")
+        logger.info("Export complete (--export-only specified, skipping training)")
        db.close()
        return

    # Start training using shared trainer
-    print("\n" + "=" * 60)
-    print("Starting YOLO Training")
-    print("=" * 60)
+    logger.info("=" * 60)
+    logger.info("Starting YOLO Training")
+    logger.info("=" * 60)

    from shared.training import YOLOTrainer, TrainingConfig

@@ -232,30 +239,30 @@ def main():
    result = trainer.train()

    if not result.success:
-        print(f"\nError: Training failed - {result.error}")
+        logger.error("Training failed - %s", result.error)
        db.close()
        sys.exit(1)

    # Print results
-    print("\n" + "=" * 60)
-    print("Training Complete")
-    print("=" * 60)
-    print(f"Best model: {result.model_path}")
-    print(f"Save directory: {result.save_dir}")
+    logger.info("=" * 60)
+    logger.info("Training Complete")
+    logger.info("=" * 60)
+    logger.info("Best model: %s", result.model_path)
+    logger.info("Save directory: %s", result.save_dir)
    if result.metrics:
-        print(f"mAP@0.5: {result.metrics.get('mAP50', 'N/A')}")
-        print(f"mAP@0.5-0.95: {result.metrics.get('mAP50-95', 'N/A')}")
+        logger.info("mAP@0.5: %s", result.metrics.get('mAP50', 'N/A'))
+        logger.info("mAP@0.5-0.95: %s", result.metrics.get('mAP50-95', 'N/A'))

    # Validate on test set
-    print("\nRunning validation on test set...")
+    logger.info("Running validation on test set...")
    if result.model_path:
        config.model_path = result.model_path
        config.data_yaml = str(data_yaml)
        test_trainer = YOLOTrainer(config=config)
        test_metrics = test_trainer.validate(split='test')
        if test_metrics:
-            print(f"mAP50: {test_metrics.get('mAP50', 0):.4f}")
-            print(f"mAP50-95: {test_metrics.get('mAP50-95', 0):.4f}")
+            logger.info("mAP50: %.4f", test_metrics.get('mAP50', 0))
+            logger.info("mAP50-95: %.4f", test_metrics.get('mAP50-95', 0))

    # Close database
    db.close()
--- a/packages/training/training/cli/validate.py
+++ b/packages/training/training/cli/validate.py
@@ -7,9 +7,14 @@ and comparing the extraction results.
 """

 import argparse
+import logging
 import sys
 from pathlib import Path

+from shared.logging_config import setup_cli_logging
+
+logger = logging.getLogger(__name__)
+


 def main():
@@ -73,6 +78,9 @@ def main():
        parser.print_help()
        return

+    # Configure logging for CLI
+    setup_cli_logging()
+
    from backend.validation import LLMValidator

    validator = LLMValidator()
@@ -104,60 +112,58 @@ def show_stats(validator):
    """Show statistics about failed matches."""
    stats = validator.get_failed_match_stats()

-    print("\n" + "=" * 50)
-    print("Failed Match Statistics")
-    print("=" * 50)
-    print(f"\nDocuments with failures: {stats['documents_with_failures']}")
-    print(f"Already validated:       {stats['already_validated']}")
-    print(f"Remaining to validate:   {stats['remaining']}")
-    print("\nFailures by field:")
+    logger.info("=" * 50)
+    logger.info("Failed Match Statistics")
+    logger.info("=" * 50)
+    logger.info("Documents with failures: %d", stats['documents_with_failures'])
+    logger.info("Already validated:       %d", stats['already_validated'])
+    logger.info("Remaining to validate:   %d", stats['remaining'])
+    logger.info("Failures by field:")
    for field, count in sorted(stats['failures_by_field'].items(), key=lambda x: -x[1]):
-        print(f"  {field}: {count}")
+        logger.info("  %s: %d", field, count)


 def validate_single(validator, doc_id: str, provider: str, model: str):
    """Validate a single document."""
-    print(f"\nValidating document: {doc_id}")
-    print(f"Provider: {provider}, Model: {model or 'default'}")
-    print()
+    logger.info("Validating document: %s", doc_id)
+    logger.info("Provider: %s, Model: %s", provider, model or 'default')

    result = validator.validate_document(doc_id, provider, model)

    if result.error:
-        print(f"ERROR: {result.error}")
+        logger.error("ERROR: %s", result.error)
        return

-    print(f"Processing time: {result.processing_time_ms:.0f}ms")
-    print(f"Model used: {result.model_used}")
-    print("\nExtracted fields:")
-    print(f"  Invoice Number: {result.invoice_number}")
-    print(f"  Invoice Date:   {result.invoice_date}")
-    print(f"  Due Date:       {result.invoice_due_date}")
-    print(f"  OCR:            {result.ocr_number}")
-    print(f"  Bankgiro:       {result.bankgiro}")
-    print(f"  Plusgiro:       {result.plusgiro}")
-    print(f"  Amount:         {result.amount}")
-    print(f"  Org Number:     {result.supplier_organisation_number}")
+    logger.info("Processing time: %.0fms", result.processing_time_ms)
+    logger.info("Model used: %s", result.model_used)
+    logger.info("Extracted fields:")
+    logger.info("  Invoice Number: %s", result.invoice_number)
+    logger.info("  Invoice Date:   %s", result.invoice_date)
+    logger.info("  Due Date:       %s", result.invoice_due_date)
+    logger.info("  OCR:            %s", result.ocr_number)
+    logger.info("  Bankgiro:       %s", result.bankgiro)
+    logger.info("  Plusgiro:       %s", result.plusgiro)
+    logger.info("  Amount:         %s", result.amount)
+    logger.info("  Org Number:     %s", result.supplier_organisation_number)

    # Show comparison
-    print("\n" + "-" * 50)
-    print("Comparison with autolabel:")
+    logger.info("-" * 50)
+    logger.info("Comparison with autolabel:")
    comparison = validator.compare_results(doc_id)
    for field, data in comparison.items():
        if data.get('csv_value'):
-            status = "✓" if data['agreement'] else "✗"
+            status = "[OK]" if data['agreement'] else "[FAIL]"
            auto_status = "matched" if data['autolabel_matched'] else "FAILED"
-            print(f"  {status} {field}:")
-            print(f"      CSV:       {data['csv_value']}")
-            print(f"      Autolabel: {data['autolabel_text']} ({auto_status})")
-            print(f"      LLM:       {data['llm_value']}")
+            logger.info("  %s %s:", status, field)
+            logger.info("      CSV:       %s", data['csv_value'])
+            logger.info("      Autolabel: %s (%s)", data['autolabel_text'], auto_status)
+            logger.info("      LLM:       %s", data['llm_value'])


 def validate_batch(validator, limit: int, provider: str, model: str):
    """Validate a batch of documents."""
-    print(f"\nValidating up to {limit} documents with failed matches")
-    print(f"Provider: {provider}, Model: {model or 'default'}")
-    print()
+    logger.info("Validating up to %d documents with failed matches", limit)
+    logger.info("Provider: %s, Model: %s", provider, model or 'default')

    results = validator.validate_batch(
        limit=limit,
@@ -171,15 +177,15 @@ def validate_batch(validator, limit: int, provider: str, model: str):
    failed = len(results) - success
    total_time = sum(r.processing_time_ms or 0 for r in results)

-    print("\n" + "=" * 50)
-    print("Validation Complete")
-    print("=" * 50)
-    print(f"Total:     {len(results)}")
-    print(f"Success:   {success}")
-    print(f"Failed:    {failed}")
-    print(f"Total time: {total_time/1000:.1f}s")
+    logger.info("=" * 50)
+    logger.info("Validation Complete")
+    logger.info("=" * 50)
+    logger.info("Total:     %d", len(results))
+    logger.info("Success:   %d", success)
+    logger.info("Failed:    %d", failed)
+    logger.info("Total time: %.1fs", total_time/1000)
    if success > 0:
-        print(f"Avg time:   {total_time/success:.0f}ms per document")
+        logger.info("Avg time:   %.0fms per document", total_time/success)


 def compare_single(validator, doc_id: str):
@@ -187,23 +193,23 @@ def compare_single(validator, doc_id: str):
    comparison = validator.compare_results(doc_id)

    if 'error' in comparison:
-        print(f"Error: {comparison['error']}")
+        logger.error("Error: %s", comparison['error'])
        return

-    print(f"\nComparison for document: {doc_id}")
-    print("=" * 60)
+    logger.info("Comparison for document: %s", doc_id)
+    logger.info("=" * 60)

    for field, data in comparison.items():
        if data.get('csv_value') is None:
            continue

-        status = "✓" if data['agreement'] else "✗"
+        status = "[OK]" if data['agreement'] else "[FAIL]"
        auto_status = "matched" if data['autolabel_matched'] else "FAILED"

-        print(f"\n{status} {field}:")
-        print(f"    CSV value:      {data['csv_value']}")
-        print(f"    Autolabel:      {data['autolabel_text']} ({auto_status})")
-        print(f"    LLM extracted:  {data['llm_value']}")
+        logger.info("%s %s:", status, field)
+        logger.info("    CSV value:      %s", data['csv_value'])
+        logger.info("    Autolabel:      %s (%s)", data['autolabel_text'], auto_status)
+        logger.info("    LLM extracted:  %s", data['llm_value'])


 def compare_all(validator, limit: int):
@@ -220,11 +226,11 @@ def compare_all(validator, limit: int):
        doc_ids = [row[0] for row in cursor.fetchall()]

    if not doc_ids:
-        print("No validated documents found.")
+        logger.info("No validated documents found.")
        return

-    print(f"\nComparison Summary ({len(doc_ids)} documents)")
-    print("=" * 80)
+    logger.info("Comparison Summary (%d documents)", len(doc_ids))
+    logger.info("=" * 80)

    # Aggregate stats
    field_stats = {}
@@ -259,12 +265,12 @@ def compare_all(validator, limit: int):
            if not data['autolabel_matched'] and data['agreement']:
                stats['llm_correct_auto_wrong'] += 1

-    print(f"\n{'Field':<30} {'Total':>6} {'Auto OK':>8} {'LLM Agrees':>10} {'LLM Found':>10}")
-    print("-" * 80)
+    logger.info("%-30s %6s %8s %10s %10s", 'Field', 'Total', 'Auto OK', 'LLM Agrees', 'LLM Found')
+    logger.info("-" * 80)

    for field, stats in sorted(field_stats.items()):
-        print(f"{field:<30} {stats['total']:>6} {stats['autolabel_matched']:>8} "
-              f"{stats['llm_agrees']:>10} {stats['llm_correct_auto_wrong']:>10}")
+        logger.info("%-30s %6d %8d %10d %10d", field, stats['total'], stats['autolabel_matched'],
+                    stats['llm_agrees'], stats['llm_correct_auto_wrong'])


 def generate_report(validator, output_path: str):
@@ -328,8 +334,8 @@ def generate_report(validator, output_path: str):
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(report, f, indent=2, ensure_ascii=False)

-    print(f"\nReport generated: {output_path}")
-    print(f"Total validations: {len(validations)}")
+    logger.info("Report generated: %s", output_path)
+    logger.info("Total validations: %d", len(validations))


 if __name__ == '__main__':
--- a/packages/training/training/yolo/db_dataset.py
+++ b/packages/training/training/yolo/db_dataset.py
@@ -18,7 +18,8 @@ import numpy as np
 from PIL import Image

 from shared.config import DEFAULT_DPI
-from shared.fields import TRAINING_FIELD_CLASSES as FIELD_CLASSES
+from shared.fields import TRAINING_FIELD_CLASSES as FIELD_CLASSES, CLASS_NAMES
+from shared.bbox import expand_bbox
 from .annotation_generator import YOLOAnnotation

 logger = logging.getLogger(__name__)
@@ -156,7 +157,7 @@ class DBYOLODataset:

        # Split items for this split
        instance.items = instance._split_dataset_from_cache()
-        print(f"Split '{split}': {len(instance.items)} items")
+        logger.info("Split '%s': %d items", split, len(instance.items))

        return instance

@@ -165,7 +166,7 @@ class DBYOLODataset:
        # Find all document directories
        temp_dir = self.images_dir / 'temp'
        if not temp_dir.exists():
-            print(f"Temp directory not found: {temp_dir}")
+            logger.warning("Temp directory not found: %s", temp_dir)
            return

        # Collect all document IDs with images
@@ -182,13 +183,13 @@ class DBYOLODataset:
            if images:
                doc_image_map[doc_dir.name] = sorted(images)

-        print(f"Found {len(doc_image_map)} documents with images")
+        logger.info("Found %d documents with images", len(doc_image_map))

        # Query database for all document labels
        doc_ids = list(doc_image_map.keys())
        doc_labels = self._load_labels_from_db(doc_ids)

-        print(f"Loaded labels for {len(doc_labels)} documents from database")
+        logger.info("Loaded labels for %d documents from database", len(doc_labels))

        # Build dataset items
        all_items: list[DatasetItem] = []
@@ -227,19 +228,19 @@ class DBYOLODataset:
                else:
                    skipped_no_labels += 1

-        print(f"Total images found: {total_images}")
-        print(f"Images with labels: {len(all_items)}")
+        logger.info("Total images found: %d", total_images)
+        logger.info("Images with labels: %d", len(all_items))
        if skipped_no_db_record > 0:
-            print(f"Skipped {skipped_no_db_record} images (document not in database)")
+            logger.info("Skipped %d images (document not in database)", skipped_no_db_record)
        if skipped_no_labels > 0:
-            print(f"Skipped {skipped_no_labels} images (no labels for page)")
+            logger.info("Skipped %d images (no labels for page)", skipped_no_labels)

        # Cache all items for sharing with other splits
        self._all_items = all_items

        # Split dataset
        self.items, self._doc_ids_ordered = self._split_dataset(all_items)
-        print(f"Split '{self.split}': {len(self.items)} items")
+        logger.info("Split '%s': %d items", self.split, len(self.items))

    def _load_labels_from_db(self, doc_ids: list[str]) -> dict[str, tuple[dict[int, list[YOLOAnnotation]], bool, str | None]]:
        """
@@ -374,7 +375,7 @@ class DBYOLODataset:

        if has_csv_splits:
            # Use CSV-defined splits
-            print("Using CSV-defined split field for train/val/test assignment")
+            logger.info("Using CSV-defined split field for train/val/test assignment")

            # Map split values: 'train' -> train, 'test' -> test, None -> train (fallback)
            # 'val' is taken from train set using val_ratio
@@ -411,11 +412,11 @@ class DBYOLODataset:
            # Apply limit if specified
            if self.limit is not None and self.limit < len(split_doc_ids):
                split_doc_ids = split_doc_ids[:self.limit]
-                print(f"Limited to {self.limit} documents")
+                logger.info("Limited to %d documents", self.limit)

        else:
            # Fall back to random splitting (original behavior)
-            print("No CSV split field found, using random splitting")
+            logger.info("No CSV split field found, using random splitting")

            random.seed(self.seed)
            random.shuffle(doc_ids)
@@ -423,7 +424,7 @@ class DBYOLODataset:
            # Apply limit if specified (before splitting)
            if self.limit is not None and self.limit < len(doc_ids):
                doc_ids = doc_ids[:self.limit]
-                print(f"Limited to {self.limit} documents")
+                logger.info("Limited to %d documents", self.limit)

            # Calculate split indices
            n_total = len(doc_ids)
@@ -549,6 +550,8 @@ class DBYOLODataset:
        """
        Convert annotations to normalized YOLO format.

+        Uses field-specific bbox expansion strategies via expand_bbox.
+
        Args:
            annotations: List of annotations
            img_width: Actual image width in pixels
@@ -568,26 +571,43 @@ class DBYOLODataset:

        labels = []
        for ann in annotations:
-            # Convert to pixels (if needed)
-            x_center_px = ann.x_center * scale
-            y_center_px = ann.y_center * scale
-            width_px = ann.width * scale
-            height_px = ann.height * scale
+            # Convert center+size to corner coords in PDF points
+            half_w = ann.width / 2
+            half_h = ann.height / 2
+            x0_pdf = ann.x_center - half_w
+            y0_pdf = ann.y_center - half_h
+            x1_pdf = ann.x_center + half_w
+            y1_pdf = ann.y_center + half_h

-            # Add padding
-            pad = self.bbox_padding_px
-            width_px += 2 * pad
-            height_px += 2 * pad
+            # Convert to pixels
+            x0_px = x0_pdf * scale
+            y0_px = y0_pdf * scale
+            x1_px = x1_pdf * scale
+            y1_px = y1_pdf * scale
+
+            # Get class name for field-specific expansion
+            class_name = CLASS_NAMES[ann.class_id]
+
+            # Apply field-specific bbox expansion
+            x0, y0, x1, y1 = expand_bbox(
+                bbox=(x0_px, y0_px, x1_px, y1_px),
+                image_width=img_width,
+                image_height=img_height,
+                field_type=class_name,
+            )

            # Ensure minimum height
+            height_px = y1 - y0
            if height_px < self.min_bbox_height_px:
-                height_px = self.min_bbox_height_px
+                extra = (self.min_bbox_height_px - height_px) / 2
+                y0 = max(0, int(y0 - extra))
+                y1 = min(img_height, int(y1 + extra))

-            # Normalize to 0-1
-            x_center = x_center_px / img_width
-            y_center = y_center_px / img_height
-            width = width_px / img_width
-            height = height_px / img_height
+            # Convert to YOLO format (normalized center + size)
+            x_center = (x0 + x1) / 2 / img_width
+            y_center = (y0 + y1) / 2 / img_height
+            width = (x1 - x0) / img_width
+            height = (y1 - y0) / img_height

            # Clamp to valid range
            x_center = max(0, min(1, x_center))
@@ -675,7 +695,7 @@ class DBYOLODataset:

            count += 1

-        print(f"Exported {count} items to {output_dir / split_name}")
+        logger.info("Exported %d items to %s", count, output_dir / split_name)
        return count


@@ -706,7 +726,7 @@ def create_datasets(
        Dict with 'train', 'val', 'test' datasets
    """
    # Create first dataset which loads all data
-    print("Loading dataset (this may take a few minutes for large datasets)...")
+    logger.info("Loading dataset (this may take a few minutes for large datasets)...")
    first_dataset = DBYOLODataset(
        images_dir=images_dir,
        db=db,