feat: add field-specific bbox expansion strategies for YOLO training
Implement center-point based bbox scaling with directional compensation to capture field labels that typically appear above or to the left of field values. This improves YOLO training data quality by including contextual information around field values. Key changes: - Add shared.bbox module with ScaleStrategy dataclass and expand_bbox function - Define field-specific strategies (ocr_number, bankgiro, invoice_date, etc.) - Support manual_mode for minimal padding (no scaling) - Integrate expand_bbox into AnnotationGenerator - Add FIELD_TO_CLASS mapping for field_name to class_name lookup - Comprehensive tests with 100% coverage (45 tests) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
192
tests/shared/bbox/test_scale_strategy.py
Normal file
192
tests/shared/bbox/test_scale_strategy.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""
|
||||
Tests for ScaleStrategy configuration.
|
||||
|
||||
Tests verify that scale strategies are properly defined, immutable,
|
||||
and cover all required fields.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from shared.bbox import (
|
||||
ScaleStrategy,
|
||||
DEFAULT_STRATEGY,
|
||||
MANUAL_LABEL_STRATEGY,
|
||||
FIELD_SCALE_STRATEGIES,
|
||||
)
|
||||
from shared.fields import CLASS_NAMES
|
||||
|
||||
|
||||
class TestScaleStrategyDataclass:
|
||||
"""Tests for ScaleStrategy dataclass behavior."""
|
||||
|
||||
def test_default_strategy_values(self):
|
||||
"""Verify default strategy has expected default values."""
|
||||
strategy = ScaleStrategy()
|
||||
assert strategy.scale_x == 1.15
|
||||
assert strategy.scale_y == 1.15
|
||||
assert strategy.extra_top_ratio == 0.0
|
||||
assert strategy.extra_bottom_ratio == 0.0
|
||||
assert strategy.extra_left_ratio == 0.0
|
||||
assert strategy.extra_right_ratio == 0.0
|
||||
assert strategy.max_pad_x == 50
|
||||
assert strategy.max_pad_y == 50
|
||||
|
||||
def test_scale_strategy_immutability(self):
|
||||
"""Verify ScaleStrategy is frozen (immutable)."""
|
||||
strategy = ScaleStrategy()
|
||||
with pytest.raises(AttributeError):
|
||||
strategy.scale_x = 2.0 # type: ignore
|
||||
|
||||
def test_custom_strategy_values(self):
|
||||
"""Verify custom values are properly set."""
|
||||
strategy = ScaleStrategy(
|
||||
scale_x=1.5,
|
||||
scale_y=1.8,
|
||||
extra_top_ratio=0.6,
|
||||
extra_left_ratio=0.8,
|
||||
max_pad_x=100,
|
||||
max_pad_y=150,
|
||||
)
|
||||
assert strategy.scale_x == 1.5
|
||||
assert strategy.scale_y == 1.8
|
||||
assert strategy.extra_top_ratio == 0.6
|
||||
assert strategy.extra_left_ratio == 0.8
|
||||
assert strategy.max_pad_x == 100
|
||||
assert strategy.max_pad_y == 150
|
||||
|
||||
|
||||
class TestDefaultStrategy:
|
||||
"""Tests for DEFAULT_STRATEGY constant."""
|
||||
|
||||
def test_default_strategy_is_scale_strategy(self):
|
||||
"""Verify DEFAULT_STRATEGY is a ScaleStrategy instance."""
|
||||
assert isinstance(DEFAULT_STRATEGY, ScaleStrategy)
|
||||
|
||||
def test_default_strategy_matches_default_values(self):
|
||||
"""Verify DEFAULT_STRATEGY has same values as ScaleStrategy()."""
|
||||
expected = ScaleStrategy()
|
||||
assert DEFAULT_STRATEGY == expected
|
||||
|
||||
|
||||
class TestManualLabelStrategy:
|
||||
"""Tests for MANUAL_LABEL_STRATEGY constant."""
|
||||
|
||||
def test_manual_label_strategy_is_scale_strategy(self):
|
||||
"""Verify MANUAL_LABEL_STRATEGY is a ScaleStrategy instance."""
|
||||
assert isinstance(MANUAL_LABEL_STRATEGY, ScaleStrategy)
|
||||
|
||||
def test_manual_label_strategy_has_no_scaling(self):
|
||||
"""Verify MANUAL_LABEL_STRATEGY has scale factors of 1.0."""
|
||||
assert MANUAL_LABEL_STRATEGY.scale_x == 1.0
|
||||
assert MANUAL_LABEL_STRATEGY.scale_y == 1.0
|
||||
|
||||
def test_manual_label_strategy_has_no_directional_expansion(self):
|
||||
"""Verify MANUAL_LABEL_STRATEGY has no directional expansion."""
|
||||
assert MANUAL_LABEL_STRATEGY.extra_top_ratio == 0.0
|
||||
assert MANUAL_LABEL_STRATEGY.extra_bottom_ratio == 0.0
|
||||
assert MANUAL_LABEL_STRATEGY.extra_left_ratio == 0.0
|
||||
assert MANUAL_LABEL_STRATEGY.extra_right_ratio == 0.0
|
||||
|
||||
def test_manual_label_strategy_has_small_max_pad(self):
|
||||
"""Verify MANUAL_LABEL_STRATEGY has small max padding."""
|
||||
assert MANUAL_LABEL_STRATEGY.max_pad_x <= 15
|
||||
assert MANUAL_LABEL_STRATEGY.max_pad_y <= 15
|
||||
|
||||
|
||||
class TestFieldScaleStrategies:
|
||||
"""Tests for FIELD_SCALE_STRATEGIES dictionary."""
|
||||
|
||||
def test_all_class_names_have_strategies(self):
|
||||
"""Verify all field class names have defined strategies."""
|
||||
for class_name in CLASS_NAMES:
|
||||
assert class_name in FIELD_SCALE_STRATEGIES, (
|
||||
f"Missing strategy for field: {class_name}"
|
||||
)
|
||||
|
||||
def test_strategies_are_scale_strategy_instances(self):
|
||||
"""Verify all strategies are ScaleStrategy instances."""
|
||||
for field_name, strategy in FIELD_SCALE_STRATEGIES.items():
|
||||
assert isinstance(strategy, ScaleStrategy), (
|
||||
f"Strategy for {field_name} is not a ScaleStrategy"
|
||||
)
|
||||
|
||||
def test_scale_values_are_greater_than_one(self):
|
||||
"""Verify all scale values are >= 1.0 (expansion, not contraction)."""
|
||||
for field_name, strategy in FIELD_SCALE_STRATEGIES.items():
|
||||
assert strategy.scale_x >= 1.0, (
|
||||
f"{field_name} scale_x should be >= 1.0"
|
||||
)
|
||||
assert strategy.scale_y >= 1.0, (
|
||||
f"{field_name} scale_y should be >= 1.0"
|
||||
)
|
||||
|
||||
def test_extra_ratios_are_non_negative(self):
|
||||
"""Verify all extra ratios are >= 0."""
|
||||
for field_name, strategy in FIELD_SCALE_STRATEGIES.items():
|
||||
assert strategy.extra_top_ratio >= 0, (
|
||||
f"{field_name} extra_top_ratio should be >= 0"
|
||||
)
|
||||
assert strategy.extra_bottom_ratio >= 0, (
|
||||
f"{field_name} extra_bottom_ratio should be >= 0"
|
||||
)
|
||||
assert strategy.extra_left_ratio >= 0, (
|
||||
f"{field_name} extra_left_ratio should be >= 0"
|
||||
)
|
||||
assert strategy.extra_right_ratio >= 0, (
|
||||
f"{field_name} extra_right_ratio should be >= 0"
|
||||
)
|
||||
|
||||
def test_max_pad_values_are_positive(self):
|
||||
"""Verify all max_pad values are > 0."""
|
||||
for field_name, strategy in FIELD_SCALE_STRATEGIES.items():
|
||||
assert strategy.max_pad_x > 0, (
|
||||
f"{field_name} max_pad_x should be > 0"
|
||||
)
|
||||
assert strategy.max_pad_y > 0, (
|
||||
f"{field_name} max_pad_y should be > 0"
|
||||
)
|
||||
|
||||
|
||||
class TestSpecificFieldStrategies:
|
||||
"""Tests for specific field strategy configurations."""
|
||||
|
||||
def test_ocr_number_expands_upward(self):
|
||||
"""Verify ocr_number strategy expands upward to capture label."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["ocr_number"]
|
||||
assert strategy.extra_top_ratio > 0.0
|
||||
assert strategy.extra_top_ratio >= 0.5 # Significant upward expansion
|
||||
|
||||
def test_bankgiro_expands_leftward(self):
|
||||
"""Verify bankgiro strategy expands leftward to capture prefix."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["bankgiro"]
|
||||
assert strategy.extra_left_ratio > 0.0
|
||||
assert strategy.extra_left_ratio >= 0.5 # Significant leftward expansion
|
||||
|
||||
def test_plusgiro_expands_leftward(self):
|
||||
"""Verify plusgiro strategy expands leftward to capture prefix."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["plusgiro"]
|
||||
assert strategy.extra_left_ratio > 0.0
|
||||
assert strategy.extra_left_ratio >= 0.5
|
||||
|
||||
def test_amount_expands_rightward(self):
|
||||
"""Verify amount strategy expands rightward for currency symbol."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["amount"]
|
||||
assert strategy.extra_right_ratio > 0.0
|
||||
|
||||
def test_invoice_date_expands_upward(self):
|
||||
"""Verify invoice_date strategy expands upward to capture label."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["invoice_date"]
|
||||
assert strategy.extra_top_ratio > 0.0
|
||||
|
||||
def test_invoice_due_date_expands_upward_and_leftward(self):
|
||||
"""Verify invoice_due_date strategy expands both up and left."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["invoice_due_date"]
|
||||
assert strategy.extra_top_ratio > 0.0
|
||||
assert strategy.extra_left_ratio > 0.0
|
||||
|
||||
def test_payment_line_has_minimal_expansion(self):
|
||||
"""Verify payment_line has conservative expansion (machine code)."""
|
||||
strategy = FIELD_SCALE_STRATEGIES["payment_line"]
|
||||
# Payment line is machine-readable, needs minimal expansion
|
||||
assert strategy.scale_x <= 1.2
|
||||
assert strategy.scale_y <= 1.3
|
||||
Reference in New Issue
Block a user