feat: add field-specific bbox expansion strategies for YOLO training
Implement center-point based bbox scaling with directional compensation to capture field labels that typically appear above or to the left of field values. This improves YOLO training data quality by including contextual information around field values. Key changes: - Add shared.bbox module with ScaleStrategy dataclass and expand_bbox function - Define field-specific strategies (ocr_number, bankgiro, invoice_date, etc.) - Support manual_mode for minimal padding (no scaling) - Integrate expand_bbox into AnnotationGenerator - Add FIELD_TO_CLASS mapping for field_name to class_name lookup - Comprehensive tests with 100% coverage (45 tests) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
YOLO Annotation Generator
|
||||
|
||||
Generates YOLO format annotations from matched fields.
|
||||
Uses field-specific bbox expansion strategies for optimal training data.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
@@ -14,7 +15,9 @@ from shared.fields import (
|
||||
TRAINING_FIELD_CLASSES as FIELD_CLASSES,
|
||||
CLASS_NAMES,
|
||||
ACCOUNT_FIELD_MAPPING,
|
||||
FIELD_TO_CLASS,
|
||||
)
|
||||
from shared.bbox import expand_bbox
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -38,19 +41,16 @@ class AnnotationGenerator:
|
||||
def __init__(
|
||||
self,
|
||||
min_confidence: float = 0.7,
|
||||
bbox_padding_px: int = 20, # Absolute padding in pixels
|
||||
min_bbox_height_px: int = 30 # Minimum bbox height
|
||||
min_bbox_height_px: int = 30, # Minimum bbox height
|
||||
):
|
||||
"""
|
||||
Initialize annotation generator.
|
||||
|
||||
Args:
|
||||
min_confidence: Minimum match score to include in training
|
||||
bbox_padding_px: Absolute padding in pixels to add around bboxes
|
||||
min_bbox_height_px: Minimum bbox height in pixels
|
||||
"""
|
||||
self.min_confidence = min_confidence
|
||||
self.bbox_padding_px = bbox_padding_px
|
||||
self.min_bbox_height_px = min_bbox_height_px
|
||||
|
||||
def generate_from_matches(
|
||||
@@ -63,6 +63,10 @@ class AnnotationGenerator:
|
||||
"""
|
||||
Generate YOLO annotations from field matches.
|
||||
|
||||
Uses field-specific bbox expansion strategies for optimal training data.
|
||||
Each field type has customized scale factors and directional compensation
|
||||
to capture field labels and context.
|
||||
|
||||
Args:
|
||||
matches: Dict of field_name -> list of Match objects
|
||||
image_width: Width of the rendered image in pixels
|
||||
@@ -82,6 +86,8 @@ class AnnotationGenerator:
|
||||
continue
|
||||
|
||||
class_id = FIELD_CLASSES[field_name]
|
||||
# Get class_name for bbox expansion strategy
|
||||
class_name = FIELD_TO_CLASS.get(field_name, field_name)
|
||||
|
||||
# Take only the best match per field
|
||||
if field_matches:
|
||||
@@ -94,19 +100,20 @@ class AnnotationGenerator:
|
||||
x0, y0, x1, y1 = best_match.bbox
|
||||
x0, y0, x1, y1 = x0 * scale, y0 * scale, x1 * scale, y1 * scale
|
||||
|
||||
# Add absolute padding
|
||||
pad = self.bbox_padding_px
|
||||
x0 = max(0, x0 - pad)
|
||||
y0 = max(0, y0 - pad)
|
||||
x1 = min(image_width, x1 + pad)
|
||||
y1 = min(image_height, y1 + pad)
|
||||
# Apply field-specific bbox expansion strategy
|
||||
x0, y0, x1, y1 = expand_bbox(
|
||||
bbox=(x0, y0, x1, y1),
|
||||
image_width=image_width,
|
||||
image_height=image_height,
|
||||
field_type=class_name,
|
||||
)
|
||||
|
||||
# Ensure minimum height
|
||||
current_height = y1 - y0
|
||||
if current_height < self.min_bbox_height_px:
|
||||
extra = (self.min_bbox_height_px - current_height) / 2
|
||||
y0 = max(0, y0 - extra)
|
||||
y1 = min(image_height, y1 + extra)
|
||||
y0 = max(0, int(y0 - extra))
|
||||
y1 = min(int(image_height), int(y1 + extra))
|
||||
|
||||
# Convert to YOLO format (normalized center + size)
|
||||
x_center = (x0 + x1) / 2 / image_width
|
||||
@@ -143,6 +150,9 @@ class AnnotationGenerator:
|
||||
"""
|
||||
Add payment_line annotation from machine code parser result.
|
||||
|
||||
Uses "payment_line" scale strategy for minimal expansion
|
||||
(machine-readable code needs less context).
|
||||
|
||||
Args:
|
||||
annotations: Existing list of annotations to append to
|
||||
payment_line_bbox: Bounding box (x0, y0, x1, y1) in PDF coordinates
|
||||
@@ -163,12 +173,13 @@ class AnnotationGenerator:
|
||||
x0, y0, x1, y1 = payment_line_bbox
|
||||
x0, y0, x1, y1 = x0 * scale, y0 * scale, x1 * scale, y1 * scale
|
||||
|
||||
# Add absolute padding
|
||||
pad = self.bbox_padding_px
|
||||
x0 = max(0, x0 - pad)
|
||||
y0 = max(0, y0 - pad)
|
||||
x1 = min(image_width, x1 + pad)
|
||||
y1 = min(image_height, y1 + pad)
|
||||
# Apply field-specific bbox expansion strategy for payment_line
|
||||
x0, y0, x1, y1 = expand_bbox(
|
||||
bbox=(x0, y0, x1, y1),
|
||||
image_width=image_width,
|
||||
image_height=image_height,
|
||||
field_type="payment_line",
|
||||
)
|
||||
|
||||
# Convert to YOLO format (normalized center + size)
|
||||
x_center = (x0 + x1) / 2 / image_width
|
||||
|
||||
Reference in New Issue
Block a user