refactor: split line_items_extractor into smaller modules with comprehensive tests
- Extract models.py (LineItem, LineItemsResult dataclasses) - Extract html_table_parser.py (ColumnMapper, HtmlTableParser) - Extract merged_cell_handler.py (MergedCellHandler for PP-StructureV3 merged cells) - Reduce line_items_extractor.py from 971 to 396 lines - Add constants for magic numbers (MIN_AMOUNT_THRESHOLD, ROW_GROUPING_THRESHOLD, etc.) - Fix row grouping algorithm in text_line_items_extractor.py - Demote INFO logs to DEBUG level in structure_detector.py - Add 209 tests achieving 85%+ coverage on main modules Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
157
tests/table/test_models.py
Normal file
157
tests/table/test_models.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Tests for Line Items Data Models
|
||||
|
||||
Tests for LineItem and LineItemsResult dataclasses.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from backend.table.models import LineItem, LineItemsResult
|
||||
|
||||
|
||||
class TestLineItem:
|
||||
"""Tests for LineItem dataclass."""
|
||||
|
||||
def test_default_values(self):
|
||||
"""Test default values for optional fields."""
|
||||
item = LineItem(row_index=0)
|
||||
|
||||
assert item.row_index == 0
|
||||
assert item.description is None
|
||||
assert item.quantity is None
|
||||
assert item.unit is None
|
||||
assert item.unit_price is None
|
||||
assert item.amount is None
|
||||
assert item.article_number is None
|
||||
assert item.vat_rate is None
|
||||
assert item.is_deduction is False
|
||||
assert item.confidence == 0.9
|
||||
|
||||
def test_custom_confidence(self):
|
||||
"""Test setting custom confidence."""
|
||||
item = LineItem(row_index=0, confidence=0.7)
|
||||
assert item.confidence == 0.7
|
||||
|
||||
def test_is_deduction_true(self):
|
||||
"""Test is_deduction flag."""
|
||||
item = LineItem(row_index=0, is_deduction=True)
|
||||
assert item.is_deduction is True
|
||||
|
||||
|
||||
class TestLineItemsResult:
|
||||
"""Tests for LineItemsResult dataclass."""
|
||||
|
||||
def test_total_amount_empty_items(self):
|
||||
"""Test total_amount returns None for empty items."""
|
||||
result = LineItemsResult(items=[], header_row=[], raw_html="")
|
||||
assert result.total_amount is None
|
||||
|
||||
def test_total_amount_single_item(self):
|
||||
"""Test total_amount with single item."""
|
||||
items = [LineItem(row_index=0, amount="100,00")]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "100,00"
|
||||
|
||||
def test_total_amount_multiple_items(self):
|
||||
"""Test total_amount with multiple items."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="100,00"),
|
||||
LineItem(row_index=1, amount="200,50"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "300,50"
|
||||
|
||||
def test_total_amount_with_deduction(self):
|
||||
"""Test total_amount includes negative amounts (deductions)."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="1000,00"),
|
||||
LineItem(row_index=1, amount="-200,00", is_deduction=True),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "800,00"
|
||||
|
||||
def test_total_amount_swedish_format_with_spaces(self):
|
||||
"""Test total_amount handles Swedish format with spaces."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="1 234,56"),
|
||||
LineItem(row_index=1, amount="2 000,00"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "3 234,56"
|
||||
|
||||
def test_total_amount_invalid_amount_skipped(self):
|
||||
"""Test total_amount skips invalid amounts."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="100,00"),
|
||||
LineItem(row_index=1, amount="invalid"),
|
||||
LineItem(row_index=2, amount="200,00"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
# Invalid amount is skipped
|
||||
assert result.total_amount == "300,00"
|
||||
|
||||
def test_total_amount_none_amount_skipped(self):
|
||||
"""Test total_amount skips None amounts."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="100,00"),
|
||||
LineItem(row_index=1, amount=None),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "100,00"
|
||||
|
||||
def test_total_amount_all_invalid_returns_none(self):
|
||||
"""Test total_amount returns None when all amounts are invalid."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="invalid"),
|
||||
LineItem(row_index=1, amount="also invalid"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount is None
|
||||
|
||||
def test_total_amount_large_numbers(self):
|
||||
"""Test total_amount handles large numbers."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="123 456,78"),
|
||||
LineItem(row_index=1, amount="876 543,22"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "1 000 000,00"
|
||||
|
||||
def test_total_amount_decimal_precision(self):
|
||||
"""Test total_amount maintains decimal precision."""
|
||||
items = [
|
||||
LineItem(row_index=0, amount="0,01"),
|
||||
LineItem(row_index=1, amount="0,02"),
|
||||
]
|
||||
result = LineItemsResult(items=items, header_row=[], raw_html="")
|
||||
|
||||
assert result.total_amount == "0,03"
|
||||
|
||||
def test_is_reversed_default_false(self):
|
||||
"""Test is_reversed defaults to False."""
|
||||
result = LineItemsResult(items=[], header_row=[], raw_html="")
|
||||
assert result.is_reversed is False
|
||||
|
||||
def test_is_reversed_can_be_set(self):
|
||||
"""Test is_reversed can be set to True."""
|
||||
result = LineItemsResult(items=[], header_row=[], raw_html="", is_reversed=True)
|
||||
assert result.is_reversed is True
|
||||
|
||||
def test_header_row_preserved(self):
|
||||
"""Test header_row is preserved."""
|
||||
header = ["Beskrivning", "Antal", "Belopp"]
|
||||
result = LineItemsResult(items=[], header_row=header, raw_html="")
|
||||
assert result.header_row == header
|
||||
|
||||
def test_raw_html_preserved(self):
|
||||
"""Test raw_html is preserved."""
|
||||
html = "<table><tr><td>Test</td></tr></table>"
|
||||
result = LineItemsResult(items=[], header_row=[], raw_html=html)
|
||||
assert result.raw_html == html
|
||||
Reference in New Issue
Block a user