feat: add field-specific bbox expansion strategies for YOLO training

Implement center-point based bbox scaling with directional compensation to capture field labels that typically appear above or to the left of field values. This improves YOLO training data quality by including contextual information around field values. Key changes: - Add shared.bbox module with ScaleStrategy dataclass and expand_bbox function - Define field-specific strategies (ocr_number, bankgiro, invoice_date, etc.) - Support manual_mode for minimal padding (no scaling) - Integrate expand_bbox into AnnotationGenerator - Add FIELD_TO_CLASS mapping for field_name to class_name lookup - Comprehensive tests with 100% coverage (45 tests) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 22:56:52 +01:00
parent 8723ef4653
commit 0990239e9c
13 changed files with 1424 additions and 18 deletions
--- a/packages/training/training/yolo/annotation_generator.py
+++ b/packages/training/training/yolo/annotation_generator.py
@@ -2,6 +2,7 @@
 YOLO Annotation Generator

 Generates YOLO format annotations from matched fields.
+Uses field-specific bbox expansion strategies for optimal training data.
 """

 from dataclasses import dataclass
@@ -14,7 +15,9 @@ from shared.fields import (
    TRAINING_FIELD_CLASSES as FIELD_CLASSES,
    CLASS_NAMES,
    ACCOUNT_FIELD_MAPPING,
+    FIELD_TO_CLASS,
 )
+from shared.bbox import expand_bbox


@dataclass
@@ -38,19 +41,16 @@ class AnnotationGenerator:
    def __init__(
        self,
        min_confidence: float = 0.7,
-        bbox_padding_px: int = 20,  # Absolute padding in pixels
-        min_bbox_height_px: int = 30  # Minimum bbox height
+        min_bbox_height_px: int = 30,  # Minimum bbox height
    ):
        """
        Initialize annotation generator.

        Args:
            min_confidence: Minimum match score to include in training
-            bbox_padding_px: Absolute padding in pixels to add around bboxes
            min_bbox_height_px: Minimum bbox height in pixels
        """
        self.min_confidence = min_confidence
-        self.bbox_padding_px = bbox_padding_px
        self.min_bbox_height_px = min_bbox_height_px

    def generate_from_matches(
@@ -63,6 +63,10 @@ class AnnotationGenerator:
        """
        Generate YOLO annotations from field matches.

+        Uses field-specific bbox expansion strategies for optimal training data.
+        Each field type has customized scale factors and directional compensation
+        to capture field labels and context.
+
        Args:
            matches: Dict of field_name -> list of Match objects
            image_width: Width of the rendered image in pixels
@@ -82,6 +86,8 @@ class AnnotationGenerator:
                continue

            class_id = FIELD_CLASSES[field_name]
+            # Get class_name for bbox expansion strategy
+            class_name = FIELD_TO_CLASS.get(field_name, field_name)

            # Take only the best match per field
            if field_matches:
@@ -94,19 +100,20 @@ class AnnotationGenerator:
                x0, y0, x1, y1 = best_match.bbox
                x0, y0, x1, y1 = x0 * scale, y0 * scale, x1 * scale, y1 * scale

-                # Add absolute padding
-                pad = self.bbox_padding_px
-                x0 = max(0, x0 - pad)
-                y0 = max(0, y0 - pad)
-                x1 = min(image_width, x1 + pad)
-                y1 = min(image_height, y1 + pad)
+                # Apply field-specific bbox expansion strategy
+                x0, y0, x1, y1 = expand_bbox(
+                    bbox=(x0, y0, x1, y1),
+                    image_width=image_width,
+                    image_height=image_height,
+                    field_type=class_name,
+                )

                # Ensure minimum height
                current_height = y1 - y0
                if current_height < self.min_bbox_height_px:
                    extra = (self.min_bbox_height_px - current_height) / 2
-                    y0 = max(0, y0 - extra)
-                    y1 = min(image_height, y1 + extra)
+                    y0 = max(0, int(y0 - extra))
+                    y1 = min(int(image_height), int(y1 + extra))

                # Convert to YOLO format (normalized center + size)
                x_center = (x0 + x1) / 2 / image_width
@@ -143,6 +150,9 @@ class AnnotationGenerator:
        """
        Add payment_line annotation from machine code parser result.

+        Uses "payment_line" scale strategy for minimal expansion
+        (machine-readable code needs less context).
+
        Args:
            annotations: Existing list of annotations to append to
            payment_line_bbox: Bounding box (x0, y0, x1, y1) in PDF coordinates
@@ -163,12 +173,13 @@ class AnnotationGenerator:
        x0, y0, x1, y1 = payment_line_bbox
        x0, y0, x1, y1 = x0 * scale, y0 * scale, x1 * scale, y1 * scale

-        # Add absolute padding
-        pad = self.bbox_padding_px
-        x0 = max(0, x0 - pad)
-        y0 = max(0, y0 - pad)
-        x1 = min(image_width, x1 + pad)
-        y1 = min(image_height, y1 + pad)
+        # Apply field-specific bbox expansion strategy for payment_line
+        x0, y0, x1, y1 = expand_bbox(
+            bbox=(x0, y0, x1, y1),
+            image_width=image_width,
+            image_height=image_height,
+            field_type="payment_line",
+        )

        # Convert to YOLO format (normalized center + size)
        x_center = (x0 + x1) / 2 / image_width