WIP

2026-02-07 13:56:00 +01:00
parent 0990239e9c
commit f1a7bfe6b7
16 changed files with 1121 additions and 307 deletions
--- a/tests/training/yolo/test_db_dataset.py
+++ b/tests/training/yolo/test_db_dataset.py
@@ -0,0 +1,251 @@
+"""Tests for db_dataset.py expand_bbox integration."""
+
+import numpy as np
+import pytest
+from unittest.mock import MagicMock, patch
+from pathlib import Path
+
+from training.yolo.db_dataset import DBYOLODataset
+from training.yolo.annotation_generator import YOLOAnnotation
+from shared.bbox import FIELD_SCALE_STRATEGIES, DEFAULT_STRATEGY
+from shared.fields import CLASS_NAMES
+
+
+class TestConvertLabelsWithExpandBbox:
+    """Tests for _convert_labels using expand_bbox instead of fixed padding."""
+
+    def test_convert_labels_uses_expand_bbox(self):
+        """Verify _convert_labels calls expand_bbox for field-specific expansion."""
+        # Create a mock dataset without loading from DB
+        dataset = object.__new__(DBYOLODataset)
+        dataset.dpi = 300
+        dataset.min_bbox_height_px = 30
+
+        # Create annotation for bankgiro (has extra_left_ratio)
+        # bbox in PDF points: x0=100, y0=200, x1=200, y1=250
+        # center: (150, 225), width: 100, height: 50
+        annotations = [
+            YOLOAnnotation(
+                class_id=4,  # bankgiro
+                x_center=150,  # in PDF points
+                y_center=225,
+                width=100,
+                height=50,
+                confidence=0.9
+            )
+        ]
+
+        # Image size in pixels (at 300 DPI)
+        img_width = 2480  # A4 width at 300 DPI
+        img_height = 3508  # A4 height at 300 DPI
+
+        # Convert labels
+        labels = dataset._convert_labels(annotations, img_width, img_height, is_scanned=False)
+
+        # Should have one label
+        assert labels.shape == (1, 5)
+
+        # Check class_id
+        assert labels[0, 0] == 4
+
+        # The bbox should be expanded using bankgiro strategy (extra_left_ratio=0.80)
+        # Original bbox at 300 DPI:
+        # x0 = 100 * (300/72) = 416.67
+        # y0 = 200 * (300/72) = 833.33
+        # x1 = 200 * (300/72) = 833.33
+        # y1 = 250 * (300/72) = 1041.67
+        # width_px = 416.67, height_px = 208.33
+
+        # After expand_bbox with bankgiro strategy:
+        # scale_x=1.45, scale_y=1.35, extra_left_ratio=0.80
+        # The x_center should shift left due to extra_left_ratio
+        x_center = labels[0, 1]
+        y_center = labels[0, 2]
+        width = labels[0, 3]
+        height = labels[0, 4]
+
+        # Verify normalized values are in valid range
+        assert 0 <= x_center <= 1
+        assert 0 <= y_center <= 1
+        assert 0 < width <= 1
+        assert 0 < height <= 1
+
+        # Width should be larger than original due to scaling and extra_left
+        # Original normalized width: 416.67 / 2480 = 0.168
+        # After bankgiro expansion it should be wider
+        assert width > 0.168
+
+    def test_convert_labels_different_field_types(self):
+        """Verify different field types use their specific strategies."""
+        dataset = object.__new__(DBYOLODataset)
+        dataset.dpi = 300
+        dataset.min_bbox_height_px = 30
+
+        img_width = 2480
+        img_height = 3508
+
+        # Same bbox for different field types
+        base_annotation = {
+            'x_center': 150,
+            'y_center': 225,
+            'width': 100,
+            'height': 50,
+            'confidence': 0.9
+        }
+
+        # OCR number (class_id=3) - has extra_top_ratio=0.60
+        ocr_annotations = [YOLOAnnotation(class_id=3, **base_annotation)]
+        ocr_labels = dataset._convert_labels(ocr_annotations, img_width, img_height, is_scanned=False)
+
+        # Bankgiro (class_id=4) - has extra_left_ratio=0.80
+        bankgiro_annotations = [YOLOAnnotation(class_id=4, **base_annotation)]
+        bankgiro_labels = dataset._convert_labels(bankgiro_annotations, img_width, img_height, is_scanned=False)
+
+        # Amount (class_id=6) - has extra_right_ratio=0.30
+        amount_annotations = [YOLOAnnotation(class_id=6, **base_annotation)]
+        amount_labels = dataset._convert_labels(amount_annotations, img_width, img_height, is_scanned=False)
+
+        # Each field type should have different expansion
+        # OCR should expand more vertically (extra_top)
+        # Bankgiro should expand more to the left
+        # Amount should expand more to the right
+
+        # OCR: extra_top shifts y_center up
+        # Bankgiro: extra_left shifts x_center left
+        # So bankgiro x_center < OCR x_center
+        assert bankgiro_labels[0, 1] < ocr_labels[0, 1]
+
+        # OCR has higher scale_y (1.80) than amount (1.35)
+        assert ocr_labels[0, 4] > amount_labels[0, 4]
+
+    def test_convert_labels_clamps_to_image_bounds(self):
+        """Verify labels are clamped to image boundaries."""
+        dataset = object.__new__(DBYOLODataset)
+        dataset.dpi = 300
+        dataset.min_bbox_height_px = 30
+
+        # Annotation near edge of image (in PDF points)
+        annotations = [
+            YOLOAnnotation(
+                class_id=4,  # bankgiro - will expand left
+                x_center=30,  # Very close to left edge
+                y_center=50,
+                width=40,
+                height=30,
+                confidence=0.9
+            )
+        ]
+
+        img_width = 2480
+        img_height = 3508
+
+        labels = dataset._convert_labels(annotations, img_width, img_height, is_scanned=False)
+
+        # All values should be in valid range
+        assert 0 <= labels[0, 1] <= 1  # x_center
+        assert 0 <= labels[0, 2] <= 1  # y_center
+        assert 0 < labels[0, 3] <= 1   # width
+        assert 0 < labels[0, 4] <= 1   # height
+
+    def test_convert_labels_empty_annotations(self):
+        """Verify empty annotations return empty array."""
+        dataset = object.__new__(DBYOLODataset)
+        dataset.dpi = 300
+        dataset.min_bbox_height_px = 30
+
+        labels = dataset._convert_labels([], 2480, 3508, is_scanned=False)
+
+        assert labels.shape == (0, 5)
+        assert labels.dtype == np.float32
+
+    def test_convert_labels_minimum_height(self):
+        """Verify minimum height is enforced after expansion."""
+        dataset = object.__new__(DBYOLODataset)
+        dataset.dpi = 300
+        dataset.min_bbox_height_px = 50  # Higher minimum
+
+        # Very small annotation
+        annotations = [
+            YOLOAnnotation(
+                class_id=9,  # payment_line - minimal expansion
+                x_center=100,
+                y_center=100,
+                width=200,
+                height=5,  # Very small height
+                confidence=0.9
+            )
+        ]
+
+        labels = dataset._convert_labels(annotations, 2480, 3508, is_scanned=False)
+
+        # Height should be at least min_bbox_height_px / img_height
+        min_normalized_height = 50 / 3508
+        assert labels[0, 4] >= min_normalized_height
+
+
+class TestCreateAnnotationWithClassName:
+    """Tests for _create_annotation storing class_name for expand_bbox lookup."""
+
+    def test_create_annotation_stores_class_name(self):
+        """Verify _create_annotation stores class_name for later use."""
+        dataset = object.__new__(DBYOLODataset)
+
+        # Create annotation for invoice_number
+        annotation = dataset._create_annotation(
+            field_name="InvoiceNumber",
+            bbox=[100, 200, 200, 250],
+            score=0.9
+        )
+
+        assert annotation.class_id == 0  # invoice_number class_id
+
+
+class TestLoadLabelsFromDbWithClassName:
+    """Tests for _load_labels_from_db preserving field_name for expansion."""
+
+    def test_load_labels_maps_field_names_correctly(self):
+        """Verify field names are mapped correctly for expand_bbox."""
+        dataset = object.__new__(DBYOLODataset)
+        dataset.min_confidence = 0.7
+
+        # Mock database
+        mock_db = MagicMock()
+        mock_db.get_documents_batch.return_value = {
+            'doc1': {
+                'success': True,
+                'pdf_type': 'text',
+                'split': 'train',
+                'field_results': [
+                    {
+                        'matched': True,
+                        'field_name': 'Bankgiro',
+                        'score': 0.9,
+                        'bbox': [100, 200, 200, 250],
+                        'page_no': 0
+                    },
+                    {
+                        'matched': True,
+                        'field_name': 'supplier_accounts(Plusgiro)',
+                        'score': 0.85,
+                        'bbox': [300, 400, 400, 450],
+                        'page_no': 0
+                    }
+                ]
+            }
+        }
+        dataset.db = mock_db
+
+        result = dataset._load_labels_from_db(['doc1'])
+
+        assert 'doc1' in result
+        page_labels, is_scanned, csv_split = result['doc1']
+
+        # Should have 2 annotations on page 0
+        assert 0 in page_labels
+        assert len(page_labels[0]) == 2
+
+        # First annotation: Bankgiro (class_id=4)
+        assert page_labels[0][0].class_id == 4
+
+        # Second annotation: Plusgiro mapped from supplier_accounts(Plusgiro) (class_id=5)
+        assert page_labels[0][1].class_id == 5
--- a/tests/web/test_training_export.py
+++ b/tests/web/test_training_export.py
@@ -0,0 +1,367 @@
+"""
+Tests for Training Export with expand_bbox integration.
+
+Tests the export endpoint's integration with field-specific bbox expansion.
+"""
+
+import pytest
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+from shared.bbox import expand_bbox
+from shared.fields import CLASS_NAMES, FIELD_CLASS_IDS
+
+
+class TestExpandBboxForExport:
+    """Tests for expand_bbox integration in export workflow."""
+
+    def test_expand_bbox_converts_normalized_to_pixel_and_back(self):
+        """Verify expand_bbox works with pixel-to-normalized conversion."""
+        # Annotation stored as normalized coords
+        x_center_norm = 0.5
+        y_center_norm = 0.5
+        width_norm = 0.1
+        height_norm = 0.05
+
+        # Image dimensions
+        img_width = 2480  # A4 at 300 DPI
+        img_height = 3508
+
+        # Convert to pixel coords
+        x_center_px = x_center_norm * img_width
+        y_center_px = y_center_norm * img_height
+        width_px = width_norm * img_width
+        height_px = height_norm * img_height
+
+        # Convert to corner coords
+        x0 = x_center_px - width_px / 2
+        y0 = y_center_px - height_px / 2
+        x1 = x_center_px + width_px / 2
+        y1 = y_center_px + height_px / 2
+
+        # Apply expansion
+        class_name = "invoice_number"
+        ex0, ey0, ex1, ey1 = expand_bbox(
+            bbox=(x0, y0, x1, y1),
+            image_width=img_width,
+            image_height=img_height,
+            field_type=class_name,
+        )
+
+        # Verify expanded bbox is larger
+        assert ex0 < x0  # Left expanded
+        assert ey0 < y0  # Top expanded
+        assert ex1 > x1  # Right expanded
+        assert ey1 > y1  # Bottom expanded
+
+        # Convert back to normalized
+        new_x_center = (ex0 + ex1) / 2 / img_width
+        new_y_center = (ey0 + ey1) / 2 / img_height
+        new_width = (ex1 - ex0) / img_width
+        new_height = (ey1 - ey0) / img_height
+
+        # Verify valid normalized coords
+        assert 0 <= new_x_center <= 1
+        assert 0 <= new_y_center <= 1
+        assert 0 <= new_width <= 1
+        assert 0 <= new_height <= 1
+
+    def test_expand_bbox_manual_mode_minimal_expansion(self):
+        """Verify manual annotations use minimal expansion."""
+        # Small bbox
+        bbox = (100, 100, 200, 150)
+        img_width = 2480
+        img_height = 3508
+
+        # Auto mode (field-specific expansion)
+        auto_result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+            manual_mode=False,
+        )
+
+        # Manual mode (minimal expansion)
+        manual_result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+            manual_mode=True,
+        )
+
+        # Auto expansion should be larger than manual
+        auto_width = auto_result[2] - auto_result[0]
+        manual_width = manual_result[2] - manual_result[0]
+        assert auto_width > manual_width
+
+        auto_height = auto_result[3] - auto_result[1]
+        manual_height = manual_result[3] - manual_result[1]
+        assert auto_height > manual_height
+
+    def test_expand_bbox_different_sources_use_correct_mode(self):
+        """Verify different annotation sources use correct expansion mode."""
+        bbox = (100, 100, 200, 150)
+        img_width = 2480
+        img_height = 3508
+
+        # Define source to manual_mode mapping
+        source_mode_mapping = {
+            "manual": True,   # Manual annotations -> minimal expansion
+            "auto": False,    # Auto-labeled -> field-specific expansion
+            "imported": True, # Imported (from CSV) -> minimal expansion
+        }
+
+        results = {}
+        for source, manual_mode in source_mode_mapping.items():
+            result = expand_bbox(
+                bbox=bbox,
+                image_width=img_width,
+                image_height=img_height,
+                field_type="ocr_number",
+                manual_mode=manual_mode,
+            )
+            results[source] = result
+
+        # Auto should have largest expansion
+        auto_area = (results["auto"][2] - results["auto"][0]) * \
+                    (results["auto"][3] - results["auto"][1])
+        manual_area = (results["manual"][2] - results["manual"][0]) * \
+                      (results["manual"][3] - results["manual"][1])
+        imported_area = (results["imported"][2] - results["imported"][0]) * \
+                        (results["imported"][3] - results["imported"][1])
+
+        assert auto_area > manual_area
+        assert auto_area > imported_area
+        # Manual and imported should be the same (both use minimal mode)
+        assert manual_area == imported_area
+
+    def test_expand_bbox_all_field_types_work(self):
+        """Verify expand_bbox works for all field types."""
+        bbox = (100, 100, 200, 150)
+        img_width = 2480
+        img_height = 3508
+
+        for class_name in CLASS_NAMES:
+            result = expand_bbox(
+                bbox=bbox,
+                image_width=img_width,
+                image_height=img_height,
+                field_type=class_name,
+            )
+
+            # Verify result is a valid bbox
+            assert len(result) == 4
+            x0, y0, x1, y1 = result
+            assert x0 >= 0
+            assert y0 >= 0
+            assert x1 <= img_width
+            assert y1 <= img_height
+            assert x1 > x0
+            assert y1 > y0
+
+
+class TestExportAnnotationExpansion:
+    """Tests for annotation expansion in export workflow."""
+
+    def test_annotation_bbox_conversion_workflow(self):
+        """Test full annotation bbox conversion workflow."""
+        # Simulate stored annotation (normalized coords)
+        class MockAnnotation:
+            class_id = FIELD_CLASS_IDS["invoice_number"]
+            class_name = "invoice_number"
+            x_center = 0.3
+            y_center = 0.2
+            width = 0.15
+            height = 0.03
+            source = "auto"
+
+        ann = MockAnnotation()
+        img_width = 2480
+        img_height = 3508
+
+        # Step 1: Convert normalized to pixel corner coords
+        half_w = (ann.width * img_width) / 2
+        half_h = (ann.height * img_height) / 2
+        x0 = ann.x_center * img_width - half_w
+        y0 = ann.y_center * img_height - half_h
+        x1 = ann.x_center * img_width + half_w
+        y1 = ann.y_center * img_height + half_h
+
+        # Step 2: Determine manual_mode based on source
+        manual_mode = ann.source in ("manual", "imported")
+
+        # Step 3: Apply expand_bbox
+        ex0, ey0, ex1, ey1 = expand_bbox(
+            bbox=(x0, y0, x1, y1),
+            image_width=img_width,
+            image_height=img_height,
+            field_type=ann.class_name,
+            manual_mode=manual_mode,
+        )
+
+        # Step 4: Convert back to normalized
+        new_x_center = (ex0 + ex1) / 2 / img_width
+        new_y_center = (ey0 + ey1) / 2 / img_height
+        new_width = (ex1 - ex0) / img_width
+        new_height = (ey1 - ey0) / img_height
+
+        # Verify expansion happened (auto mode)
+        assert new_width > ann.width
+        assert new_height > ann.height
+
+        # Verify valid YOLO format
+        assert 0 <= new_x_center <= 1
+        assert 0 <= new_y_center <= 1
+        assert 0 < new_width <= 1
+        assert 0 < new_height <= 1
+
+    def test_export_applies_expansion_to_each_annotation(self):
+        """Test that export applies expansion to each annotation."""
+        # Simulate multiple annotations with different sources
+        # Use smaller bboxes so manual mode padding has visible effect
+        annotations = [
+            {"class_name": "invoice_number", "source": "auto", "x_center": 0.3, "y_center": 0.2, "width": 0.05, "height": 0.02},
+            {"class_name": "ocr_number", "source": "manual", "x_center": 0.5, "y_center": 0.8, "width": 0.05, "height": 0.02},
+            {"class_name": "amount", "source": "imported", "x_center": 0.7, "y_center": 0.5, "width": 0.05, "height": 0.02},
+        ]
+
+        img_width = 2480
+        img_height = 3508
+
+        expanded_annotations = []
+        for ann in annotations:
+            # Convert to pixel coords
+            half_w = (ann["width"] * img_width) / 2
+            half_h = (ann["height"] * img_height) / 2
+            x0 = ann["x_center"] * img_width - half_w
+            y0 = ann["y_center"] * img_height - half_h
+            x1 = ann["x_center"] * img_width + half_w
+            y1 = ann["y_center"] * img_height + half_h
+
+            # Determine manual_mode
+            manual_mode = ann["source"] in ("manual", "imported")
+
+            # Apply expansion
+            ex0, ey0, ex1, ey1 = expand_bbox(
+                bbox=(x0, y0, x1, y1),
+                image_width=img_width,
+                image_height=img_height,
+                field_type=ann["class_name"],
+                manual_mode=manual_mode,
+            )
+
+            # Convert back to normalized
+            expanded_annotations.append({
+                "class_name": ann["class_name"],
+                "source": ann["source"],
+                "x_center": (ex0 + ex1) / 2 / img_width,
+                "y_center": (ey0 + ey1) / 2 / img_height,
+                "width": (ex1 - ex0) / img_width,
+                "height": (ey1 - ey0) / img_height,
+            })
+
+        # Verify auto-labeled annotation expanded more than manual/imported
+        auto_ann = next(a for a in expanded_annotations if a["source"] == "auto")
+        manual_ann = next(a for a in expanded_annotations if a["source"] == "manual")
+
+        # Auto mode should expand more than manual mode
+        # (auto has larger scale factors and max_pad)
+        assert auto_ann["width"] > manual_ann["width"]
+        assert auto_ann["height"] > manual_ann["height"]
+
+        # All annotations should be expanded (at least slightly for manual mode)
+        # Allow small precision loss (< 1%) due to integer conversion in expand_bbox
+        for i, (orig, exp) in enumerate(zip(annotations, expanded_annotations)):
+            # Width and height should be >= original (expansion or equal, with small tolerance)
+            tolerance = 0.01  # 1% tolerance for integer rounding
+            assert exp["width"] >= orig["width"] * (1 - tolerance), \
+                f"Annotation {i} width unexpectedly smaller: {exp['width']} < {orig['width']}"
+            assert exp["height"] >= orig["height"] * (1 - tolerance), \
+                f"Annotation {i} height unexpectedly smaller: {exp['height']} < {orig['height']}"
+
+
+class TestExpandBboxEdgeCases:
+    """Tests for edge cases in export bbox expansion."""
+
+    def test_bbox_at_image_edge_left(self):
+        """Test bbox at left edge of image."""
+        bbox = (0, 100, 50, 150)
+        img_width = 2480
+        img_height = 3508
+
+        result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+        )
+
+        # Left edge should be clamped to 0
+        assert result[0] >= 0
+
+    def test_bbox_at_image_edge_right(self):
+        """Test bbox at right edge of image."""
+        bbox = (2400, 100, 2480, 150)
+        img_width = 2480
+        img_height = 3508
+
+        result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+        )
+
+        # Right edge should be clamped to image width
+        assert result[2] <= img_width
+
+    def test_bbox_at_image_edge_top(self):
+        """Test bbox at top edge of image."""
+        bbox = (100, 0, 200, 50)
+        img_width = 2480
+        img_height = 3508
+
+        result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+        )
+
+        # Top edge should be clamped to 0
+        assert result[1] >= 0
+
+    def test_bbox_at_image_edge_bottom(self):
+        """Test bbox at bottom edge of image."""
+        bbox = (100, 3400, 200, 3508)
+        img_width = 2480
+        img_height = 3508
+
+        result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+        )
+
+        # Bottom edge should be clamped to image height
+        assert result[3] <= img_height
+
+    def test_very_small_bbox(self):
+        """Test very small bbox gets expanded."""
+        bbox = (100, 100, 105, 105)  # 5x5 pixel bbox
+        img_width = 2480
+        img_height = 3508
+
+        result = expand_bbox(
+            bbox=bbox,
+            image_width=img_width,
+            image_height=img_height,
+            field_type="invoice_number",
+        )
+
+        # Should still produce a valid expanded bbox
+        assert result[2] > result[0]
+        assert result[3] > result[1]