refactor: split line_items_extractor into smaller modules with comprehensive tests

- Extract models.py (LineItem, LineItemsResult dataclasses) - Extract html_table_parser.py (ColumnMapper, HtmlTableParser) - Extract merged_cell_handler.py (MergedCellHandler for PP-StructureV3 merged cells) - Reduce line_items_extractor.py from 971 to 396 lines - Add constants for magic numbers (MIN_AMOUNT_THRESHOLD, ROW_GROUPING_THRESHOLD, etc.) - Fix row grouping algorithm in text_line_items_extractor.py - Demote INFO logs to DEBUG level in structure_detector.py - Add 209 tests achieving 85%+ coverage on main modules Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 23:02:00 +01:00
parent c2c8f2dd04
commit 8723ef4653
11 changed files with 2230 additions and 841 deletions
--- a/tests/table/test_models.py
+++ b/tests/table/test_models.py
@@ -0,0 +1,157 @@
+"""
+Tests for Line Items Data Models
+
+Tests for LineItem and LineItemsResult dataclasses.
+"""
+
+import pytest
+from backend.table.models import LineItem, LineItemsResult
+
+
+class TestLineItem:
+    """Tests for LineItem dataclass."""
+
+    def test_default_values(self):
+        """Test default values for optional fields."""
+        item = LineItem(row_index=0)
+
+        assert item.row_index == 0
+        assert item.description is None
+        assert item.quantity is None
+        assert item.unit is None
+        assert item.unit_price is None
+        assert item.amount is None
+        assert item.article_number is None
+        assert item.vat_rate is None
+        assert item.is_deduction is False
+        assert item.confidence == 0.9
+
+    def test_custom_confidence(self):
+        """Test setting custom confidence."""
+        item = LineItem(row_index=0, confidence=0.7)
+        assert item.confidence == 0.7
+
+    def test_is_deduction_true(self):
+        """Test is_deduction flag."""
+        item = LineItem(row_index=0, is_deduction=True)
+        assert item.is_deduction is True
+
+
+class TestLineItemsResult:
+    """Tests for LineItemsResult dataclass."""
+
+    def test_total_amount_empty_items(self):
+        """Test total_amount returns None for empty items."""
+        result = LineItemsResult(items=[], header_row=[], raw_html="")
+        assert result.total_amount is None
+
+    def test_total_amount_single_item(self):
+        """Test total_amount with single item."""
+        items = [LineItem(row_index=0, amount="100,00")]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "100,00"
+
+    def test_total_amount_multiple_items(self):
+        """Test total_amount with multiple items."""
+        items = [
+            LineItem(row_index=0, amount="100,00"),
+            LineItem(row_index=1, amount="200,50"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "300,50"
+
+    def test_total_amount_with_deduction(self):
+        """Test total_amount includes negative amounts (deductions)."""
+        items = [
+            LineItem(row_index=0, amount="1000,00"),
+            LineItem(row_index=1, amount="-200,00", is_deduction=True),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "800,00"
+
+    def test_total_amount_swedish_format_with_spaces(self):
+        """Test total_amount handles Swedish format with spaces."""
+        items = [
+            LineItem(row_index=0, amount="1 234,56"),
+            LineItem(row_index=1, amount="2 000,00"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "3 234,56"
+
+    def test_total_amount_invalid_amount_skipped(self):
+        """Test total_amount skips invalid amounts."""
+        items = [
+            LineItem(row_index=0, amount="100,00"),
+            LineItem(row_index=1, amount="invalid"),
+            LineItem(row_index=2, amount="200,00"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        # Invalid amount is skipped
+        assert result.total_amount == "300,00"
+
+    def test_total_amount_none_amount_skipped(self):
+        """Test total_amount skips None amounts."""
+        items = [
+            LineItem(row_index=0, amount="100,00"),
+            LineItem(row_index=1, amount=None),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "100,00"
+
+    def test_total_amount_all_invalid_returns_none(self):
+        """Test total_amount returns None when all amounts are invalid."""
+        items = [
+            LineItem(row_index=0, amount="invalid"),
+            LineItem(row_index=1, amount="also invalid"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount is None
+
+    def test_total_amount_large_numbers(self):
+        """Test total_amount handles large numbers."""
+        items = [
+            LineItem(row_index=0, amount="123 456,78"),
+            LineItem(row_index=1, amount="876 543,22"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "1 000 000,00"
+
+    def test_total_amount_decimal_precision(self):
+        """Test total_amount maintains decimal precision."""
+        items = [
+            LineItem(row_index=0, amount="0,01"),
+            LineItem(row_index=1, amount="0,02"),
+        ]
+        result = LineItemsResult(items=items, header_row=[], raw_html="")
+
+        assert result.total_amount == "0,03"
+
+    def test_is_reversed_default_false(self):
+        """Test is_reversed defaults to False."""
+        result = LineItemsResult(items=[], header_row=[], raw_html="")
+        assert result.is_reversed is False
+
+    def test_is_reversed_can_be_set(self):
+        """Test is_reversed can be set to True."""
+        result = LineItemsResult(items=[], header_row=[], raw_html="", is_reversed=True)
+        assert result.is_reversed is True
+
+    def test_header_row_preserved(self):
+        """Test header_row is preserved."""
+        header = ["Beskrivning", "Antal", "Belopp"]
+        result = LineItemsResult(items=[], header_row=header, raw_html="")
+        assert result.header_row == header
+
+    def test_raw_html_preserved(self):
+        """Test raw_html is preserved."""
+        html = "<table><tr><td>Test</td></tr></table>"
+        result = LineItemsResult(items=[], header_row=[], raw_html=html)
+        assert result.raw_html == html