invoice-master-poc-v2/tests/table/test_line_items_extractor.py

"""
Tests for Line Items Extractor

Tests extraction of structured line items from HTML tables.
"""

import pytest
from backend.table.line_items_extractor import (
    LineItem,
    LineItemsResult,
    LineItemsExtractor,
    ColumnMapper,
    HTMLTableParser,
)


class TestLineItem:
    """Tests for LineItem dataclass."""

    def test_create_line_item_with_all_fields(self):
        """Test creating a line item with all fields populated."""
        item = LineItem(
            row_index=0,
            description="Samfällighetsavgift",
            quantity="1",
            unit="st",
            unit_price="6888,00",
            amount="6888,00",
            article_number="3035",
            vat_rate="25",
            confidence=0.95,
        )
        assert item.description == "Samfällighetsavgift"
        assert item.quantity == "1"
        assert item.amount == "6888,00"
        assert item.article_number == "3035"

    def test_create_line_item_with_minimal_fields(self):
        """Test creating a line item with only required fields."""
        item = LineItem(
            row_index=0,
            description="Test item",
            amount="100,00",
        )
        assert item.description == "Test item"
        assert item.amount == "100,00"
        assert item.quantity is None
        assert item.unit_price is None


class TestHTMLTableParser:
    """Tests for HTML table parsing."""

    def test_parse_simple_table(self):
        """Test parsing a simple HTML table."""
        html = """
        <html><body><table>
            <tr><td>A</td><td>B</td></tr>
            <tr><td>1</td><td>2</td></tr>
        </table></body></html>
        """
        parser = HTMLTableParser()
        header, rows = parser.parse(html)

        assert header == []  # No thead
        assert len(rows) == 2
        assert rows[0] == ["A", "B"]
        assert rows[1] == ["1", "2"]

    def test_parse_table_with_thead(self):
        """Test parsing a table with explicit thead."""
        html = """
        <html><body><table>
            <thead><tr><th>Name</th><th>Price</th></tr></thead>
            <tbody><tr><td>Item 1</td><td>100</td></tr></tbody>
        </table></body></html>
        """
        parser = HTMLTableParser()
        header, rows = parser.parse(html)

        assert header == ["Name", "Price"]
        assert len(rows) == 1
        assert rows[0] == ["Item 1", "100"]

    def test_parse_empty_table(self):
        """Test parsing an empty table."""
        html = "<html><body><table></table></body></html>"
        parser = HTMLTableParser()
        header, rows = parser.parse(html)

        assert header == []
        assert rows == []

    def test_parse_table_with_empty_cells(self):
        """Test parsing a table with empty cells."""
        html = """
        <html><body><table>
            <tr><td></td><td>Value</td><td></td></tr>
        </table></body></html>
        """
        parser = HTMLTableParser()
        header, rows = parser.parse(html)

        assert rows[0] == ["", "Value", ""]


class TestColumnMapper:
    """Tests for column mapping."""

    def test_map_swedish_headers(self):
        """Test mapping Swedish column headers."""
        mapper = ColumnMapper()
        headers = ["Art nummer", "Produktbeskrivning", "Antal", "Enhet", "A-pris", "Belopp"]

        mapping = mapper.map(headers)

        assert mapping[0] == "article_number"
        assert mapping[1] == "description"
        assert mapping[2] == "quantity"
        assert mapping[3] == "unit"
        assert mapping[4] == "unit_price"
        assert mapping[5] == "amount"

    def test_map_merged_headers(self):
        """Test mapping merged column headers (e.g., 'Moms A-pris')."""
        mapper = ColumnMapper()
        headers = ["Belopp", "Moms A-pris", "Enhet Antal", "Vara/tjänst", "Art.nr"]

        mapping = mapper.map(headers)

        assert mapping.get(0) == "amount"
        assert mapping.get(3) == "description"  # Vara/tjänst -> description
        assert mapping.get(4) == "article_number"  # Art.nr -> article_number

    def test_map_empty_headers(self):
        """Test mapping empty headers."""
        mapper = ColumnMapper()
        headers = ["", "", ""]

        mapping = mapper.map(headers)

        assert mapping == {}

    def test_map_unknown_headers(self):
        """Test mapping unknown headers."""
        mapper = ColumnMapper()
        headers = ["Foo", "Bar", "Baz"]

        mapping = mapper.map(headers)

        assert mapping == {}


class TestLineItemsExtractor:
    """Tests for LineItemsExtractor."""

    def test_extract_from_simple_html(self):
        """Test extracting line items from simple HTML."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Antal</th><th>Pris</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td>Product A</td><td>2</td><td>50,00</td><td>100,00</td></tr>
                <tr><td>Product B</td><td>1</td><td>75,00</td><td>75,00</td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 2
        assert result.items[0].description == "Product A"
        assert result.items[0].quantity == "2"
        assert result.items[0].amount == "100,00"
        assert result.items[1].description == "Product B"

    def test_extract_from_reversed_table(self):
        """Test extracting from table with header at bottom (PP-StructureV3 quirk)."""
        html = """
        <html><body><table>
            <tr><td>6 888,00</td><td>6 888,00</td><td>1</td><td>Samfällighetsavgift</td><td>3035</td></tr>
            <tr><td>4 811,44</td><td>4 811,44</td><td>1</td><td>GA:1 Avgift</td><td>303501</td></tr>
            <tr><td>Belopp</td><td>Moms A-pris</td><td>Enhet Antal</td><td>Vara/tjänst</td><td>Art.nr</td></tr>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 2
        assert result.items[0].amount == "6 888,00"
        assert result.items[0].description == "Samfällighetsavgift"
        assert result.items[1].description == "GA:1 Avgift"

    def test_extract_from_empty_html(self):
        """Test extracting from empty HTML."""
        extractor = LineItemsExtractor()
        result = extractor.extract("<html><body><table></table></body></html>")

        assert result.items == []

    def test_extract_returns_result_with_metadata(self):
        """Test that extraction returns LineItemsResult with metadata."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody><tr><td>Test</td><td>100</td></tr></tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert isinstance(result, LineItemsResult)
        assert result.raw_html == html
        assert result.header_row == ["Beskrivning", "Belopp"]

    def test_extract_skips_empty_rows(self):
        """Test that extraction skips rows with no content."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td></td><td></td></tr>
                <tr><td>Real item</td><td>100</td></tr>
                <tr><td></td><td></td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 1
        assert result.items[0].description == "Real item"

    def test_is_line_items_table(self):
        """Test detection of line items table vs summary table."""
        extractor = LineItemsExtractor()

        # Line items table
        line_items_headers = ["Art nummer", "Produktbeskrivning", "Antal", "Belopp"]
        assert extractor.is_line_items_table(line_items_headers) is True

        # Summary table
        summary_headers = ["Frakt", "Faktura.avg", "Exkl.moms", "Moms", "Belopp att betala"]
        assert extractor.is_line_items_table(summary_headers) is False

        # Payment table
        payment_headers = ["Bankgiro", "OCR", "Belopp"]
        assert extractor.is_line_items_table(payment_headers) is False


class TestLineItemsExtractorFromPdf:
    """Tests for PDF extraction."""

    def test_extract_from_pdf_no_tables(self):
        """Test extraction from PDF with no tables returns None."""
        from unittest.mock import patch

        extractor = LineItemsExtractor()

        # Mock _detect_tables_with_parsing to return no tables and no parsing_res
        with patch.object(extractor, '_detect_tables_with_parsing') as mock_detect:
            mock_detect.return_value = ([], [])

            result = extractor.extract_from_pdf("fake.pdf")

            assert result is None

    def test_extract_from_pdf_with_tables(self):
        """Test extraction from PDF with tables."""
        from unittest.mock import patch, MagicMock
        from backend.table.structure_detector import TableDetectionResult

        extractor = LineItemsExtractor()

        # Create mock table detection result with proper thead/tbody structure
        mock_table = MagicMock(spec=TableDetectionResult)
        mock_table.html = """
        <table>
            <thead><tr><th>Beskrivning</th><th>Antal</th><th>Pris</th><th>Belopp</th></tr></thead>
            <tbody><tr><td>Product A</td><td>2</td><td>100,00</td><td>200,00</td></tr></tbody>
        </table>
        """

        # Mock _detect_tables_with_parsing to return table results
        with patch.object(extractor, '_detect_tables_with_parsing') as mock_detect:
            mock_detect.return_value = ([mock_table], [])

            result = extractor.extract_from_pdf("fake.pdf")

            assert result is not None
            assert len(result.items) >= 1


class TestPdfPathValidation:
    """Tests for PDF path validation."""

    def test_detect_tables_with_nonexistent_path(self):
        """Test that non-existent PDF path returns empty results."""
        extractor = LineItemsExtractor()

        # Create detector and call _detect_tables_with_parsing with non-existent path
        from unittest.mock import MagicMock
        from backend.table.structure_detector import TableDetector

        mock_detector = MagicMock(spec=TableDetector)
        tables, parsing_res = extractor._detect_tables_with_parsing(
            mock_detector, "nonexistent.pdf"
        )

        assert tables == []
        assert parsing_res == []

    def test_detect_tables_with_directory_path(self, tmp_path):
        """Test that directory path (not file) returns empty results."""
        extractor = LineItemsExtractor()

        from unittest.mock import MagicMock
        from backend.table.structure_detector import TableDetector

        mock_detector = MagicMock(spec=TableDetector)

        # tmp_path is a directory, not a file
        tables, parsing_res = extractor._detect_tables_with_parsing(
            mock_detector, str(tmp_path)
        )

        assert tables == []
        assert parsing_res == []

    def test_detect_tables_validates_file_exists(self, tmp_path):
        """Test path validation for file existence.

        This test verifies that the method correctly validates the path exists
        and is a file before attempting to process it.
        """
        from unittest.mock import patch

        extractor = LineItemsExtractor()

        # Create a real file path that exists
        fake_pdf = tmp_path / "test.pdf"
        fake_pdf.write_bytes(b"not a real pdf")

        # Mock render_pdf_to_images to avoid actual PDF processing
        with patch("shared.pdf.renderer.render_pdf_to_images") as mock_render:
            # Return empty iterator - simulates file exists but no pages
            mock_render.return_value = iter([])

            from unittest.mock import MagicMock
            from backend.table.structure_detector import TableDetector

            mock_detector = MagicMock(spec=TableDetector)
            mock_detector._ensure_initialized = MagicMock()
            mock_detector._pipeline = MagicMock()

            tables, parsing_res = extractor._detect_tables_with_parsing(
                mock_detector, str(fake_pdf)
            )

            # render_pdf_to_images was called (path validation passed)
            mock_render.assert_called_once()
            assert tables == []
            assert parsing_res == []


class TestLineItemsResult:
    """Tests for LineItemsResult dataclass."""

    def test_create_result(self):
        """Test creating a LineItemsResult."""
        items = [
            LineItem(row_index=0, description="Item 1", amount="100"),
            LineItem(row_index=1, description="Item 2", amount="200"),
        ]
        result = LineItemsResult(
            items=items,
            header_row=["Beskrivning", "Belopp"],
            raw_html="<table>...</table>",
        )

        assert len(result.items) == 2
        assert result.header_row == ["Beskrivning", "Belopp"]
        assert result.raw_html == "<table>...</table>"

    def test_total_amount_calculation(self):
        """Test calculating total amount from line items."""
        items = [
            LineItem(row_index=0, description="Item 1", amount="100,00"),
            LineItem(row_index=1, description="Item 2", amount="200,50"),
        ]
        result = LineItemsResult(items=items, header_row=[], raw_html="")

        # Total should be calculated correctly
        assert result.total_amount == "300,50"

    def test_total_amount_with_deduction(self):
        """Test total amount calculation includes deductions (as separate rows)."""
        items = [
            LineItem(row_index=0, description="Rent", amount="8159", is_deduction=False),
            LineItem(row_index=1, description="Avdrag", amount="-2000", is_deduction=True),
        ]
        result = LineItemsResult(items=items, header_row=[], raw_html="")

        # Total should be 8159 + (-2000) = 6159
        assert result.total_amount == "6 159,00"

    def test_empty_result(self):
        """Test empty LineItemsResult."""
        result = LineItemsResult(items=[], header_row=[], raw_html="")

        assert result.items == []
        assert result.total_amount is None


class TestMergedCellExtraction:
    """Tests for merged cell extraction (rental invoices)."""

    def test_has_merged_header_single_cell_with_keywords(self):
        """Test detection of merged header with multiple keywords."""
        extractor = LineItemsExtractor()

        # Single cell with multiple keywords - should be detected as merged
        merged_header = ["Specifikation 0218103-1201 2 rum och kök Hyra Avdrag"]
        assert extractor._has_merged_header(merged_header) is True

    def test_has_merged_header_normal_header(self):
        """Test normal header is not detected as merged."""
        extractor = LineItemsExtractor()

        # Normal separate headers
        normal_header = ["Beskrivning", "Antal", "Belopp"]
        assert extractor._has_merged_header(normal_header) is False

    def test_has_merged_header_empty(self):
        """Test empty header."""
        extractor = LineItemsExtractor()
        assert extractor._has_merged_header([]) is False
        assert extractor._has_merged_header(None) is False

    def test_has_merged_header_with_empty_trailing_cells(self):
        """Test merged header detection with empty trailing cells."""
        extractor = LineItemsExtractor()

        # PP-StructureV3 may produce headers with empty trailing cells
        merged_header_with_empty = ["Specifikation 0218103-1201 2 rum och kök Hyra Avdrag", "", "", ""]
        assert extractor._has_merged_header(merged_header_with_empty) is True

        # Should also work with leading empty cells
        merged_header_leading_empty = ["", "", "Specifikation 0218103-1201 2 rum och kök Hyra Avdrag", ""]
        assert extractor._has_merged_header(merged_header_leading_empty) is True

    def test_extract_from_merged_cells_rental_invoice(self):
        """Test extracting from merged cells like rental invoice.

        Each amount becomes a separate row. Negative amounts are marked as is_deduction=True.
        """
        extractor = LineItemsExtractor()

        header = ["Specifikation 0218103-1201 2 rum och kök Hyra Avdrag"]
        rows = [
            ["", "", "", "8159 -2000"],
            ["", "", "", ""],
        ]

        items = extractor._extract_from_merged_cells(header, rows)

        # Should have 2 items: one for amount, one for deduction
        assert len(items) == 2
        assert items[0].amount == "8159"
        assert items[0].is_deduction is False
        assert items[0].article_number == "0218103-1201"
        assert items[0].description == "2 rum och kök"

        assert items[1].amount == "-2000"
        assert items[1].is_deduction is True
        assert items[1].description == "Avdrag"

    def test_extract_from_merged_cells_separate_rows(self):
        """Test extracting when amount and deduction are in separate rows."""
        extractor = LineItemsExtractor()

        header = ["Specifikation 0218103-1201 2 rum och kök Hyra Avdrag"]
        rows = [
            ["", "", "", "8159"],      # Amount in row 1
            ["", "", "", "-2000"],     # Deduction in row 2
        ]

        items = extractor._extract_from_merged_cells(header, rows)

        # Should have 2 items: one for amount, one for deduction
        assert len(items) == 2
        assert items[0].amount == "8159"
        assert items[0].is_deduction is False
        assert items[0].article_number == "0218103-1201"
        assert items[0].description == "2 rum och kök"

        assert items[1].amount == "-2000"
        assert items[1].is_deduction is True

    def test_extract_from_merged_cells_swedish_format(self):
        """Test extracting Swedish formatted amounts with spaces."""
        extractor = LineItemsExtractor()

        header = ["Specifikation 0218103-1201 2 rum och kök Hyra Avdrag"]
        rows = [
            ["", "", "", "8 159"],      # Swedish format with space
            ["", "", "", "-2 000"],     # Swedish format with space
        ]

        items = extractor._extract_from_merged_cells(header, rows)

        # Should have 2 items
        assert len(items) == 2
        # Amounts are cleaned (spaces removed)
        assert items[0].amount == "8159"
        assert items[0].is_deduction is False
        assert items[1].amount == "-2000"
        assert items[1].is_deduction is True

    def test_extract_merged_cells_via_extract(self):
        """Test that extract() calls merged cell parsing when needed."""
        html = """
        <html><body><table>
            <tr><td colspan="4">Specifikation 0218103-1201 2 rum och kök Hyra Avdrag</td></tr>
            <tr><td></td><td></td><td></td><td>8159 -2000</td></tr>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        # Should have extracted 2 items via merged cell parsing
        assert len(result.items) == 2
        assert result.items[0].amount == "8159"
        assert result.items[0].is_deduction is False
        assert result.items[1].amount == "-2000"
        assert result.items[1].is_deduction is True


class TestTextFallbackExtraction:
    """Tests for text-based fallback extraction."""

    def test_text_fallback_disabled_by_default(self):
        """Test text fallback can be disabled."""
        extractor = LineItemsExtractor(enable_text_fallback=False)
        assert extractor.enable_text_fallback is False

    def test_text_fallback_enabled_by_default(self):
        """Test text fallback is enabled by default."""
        extractor = LineItemsExtractor()
        assert extractor.enable_text_fallback is True

    def test_try_text_fallback_with_valid_parsing_res(self):
        """Test text fallback with valid parsing results."""
        from unittest.mock import patch, MagicMock
        from backend.table.text_line_items_extractor import (
            TextLineItemsExtractor,
            TextLineItem,
            TextLineItemsResult,
        )

        extractor = LineItemsExtractor()

        # Mock parsing_res_list with text elements
        parsing_res = [
            {"label": "text", "bbox": [0, 100, 200, 120], "text": "Product A"},
            {"label": "text", "bbox": [250, 100, 350, 120], "text": "1 234,56"},
            {"label": "text", "bbox": [0, 150, 200, 170], "text": "Product B"},
            {"label": "text", "bbox": [250, 150, 350, 170], "text": "2 345,67"},
        ]

        # Create mock text extraction result
        mock_text_result = TextLineItemsResult(
            items=[
                TextLineItem(row_index=0, description="Product A", amount="1 234,56"),
                TextLineItem(row_index=1, description="Product B", amount="2 345,67"),
            ],
            header_row=[],
        )

        with patch.object(TextLineItemsExtractor, 'extract_from_parsing_res', return_value=mock_text_result):
            result = extractor._try_text_fallback(parsing_res)

            assert result is not None
            assert len(result.items) == 2
            assert result.items[0].description == "Product A"
            assert result.items[1].description == "Product B"

    def test_try_text_fallback_returns_none_on_failure(self):
        """Test text fallback returns None when extraction fails."""
        from unittest.mock import patch

        extractor = LineItemsExtractor()

        with patch('backend.table.text_line_items_extractor.TextLineItemsExtractor.extract_from_parsing_res', return_value=None):
            result = extractor._try_text_fallback([])
            assert result is None

    def test_extract_from_pdf_uses_text_fallback(self):
        """Test extract_from_pdf uses text fallback when no tables found."""
        from unittest.mock import patch, MagicMock
        from backend.table.text_line_items_extractor import TextLineItem, TextLineItemsResult

        extractor = LineItemsExtractor(enable_text_fallback=True)

        # Mock _detect_tables_with_parsing to return no tables but parsing_res
        mock_text_result = TextLineItemsResult(
            items=[
                TextLineItem(row_index=0, description="Product", amount="100,00"),
                TextLineItem(row_index=1, description="Product 2", amount="200,00"),
            ],
            header_row=[],
        )

        with patch.object(extractor, '_detect_tables_with_parsing') as mock_detect:
            mock_detect.return_value = ([], [{"label": "text", "text": "test"}])

            with patch.object(extractor, '_try_text_fallback', return_value=MagicMock(items=[MagicMock()])) as mock_fallback:
                result = extractor.extract_from_pdf("fake.pdf")

                # Text fallback should be called
                mock_fallback.assert_called_once()

    def test_extract_from_pdf_skips_fallback_when_disabled(self):
        """Test extract_from_pdf skips text fallback when disabled."""
        from unittest.mock import patch

        extractor = LineItemsExtractor(enable_text_fallback=False)

        with patch.object(extractor, '_detect_tables_with_parsing') as mock_detect:
            mock_detect.return_value = ([], [{"label": "text", "text": "test"}])

            result = extractor.extract_from_pdf("fake.pdf")

            # Should return None, not use text fallback
            assert result is None


class TestVerticallyMergedCellExtraction:
    """Tests for vertically merged cell extraction."""

    def test_detects_vertically_merged_cells(self):
        """Test detection of vertically merged cells in rows."""
        extractor = LineItemsExtractor()

        # Rows with multiple product numbers in single cell
        rows = [["Produktnr 1457280 1457281 1060381 merged text here"]]
        assert extractor._has_vertically_merged_cells(rows) is True

    def test_splits_vertically_merged_rows(self):
        """Test splitting vertically merged rows."""
        extractor = LineItemsExtractor()

        rows = [
            ["Produktnr 1234567 1234568", "Antal 2ST 3ST"],
        ]
        header, data = extractor._split_merged_rows(rows)

        # Should split into header + data rows
        assert isinstance(header, list)
        assert isinstance(data, list)


class TestDeductionDetection:
    """Tests for deduction/discount detection."""

    def test_detects_deduction_by_keyword_avdrag(self):
        """Test detection of deduction by 'avdrag' keyword."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td>Hyresavdrag januari</td><td>-500,00</td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 1
        assert result.items[0].is_deduction is True

    def test_detects_deduction_by_keyword_rabatt(self):
        """Test detection of deduction by 'rabatt' keyword."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td>Rabatt 10%</td><td>-100,00</td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 1
        assert result.items[0].is_deduction is True

    def test_detects_deduction_by_negative_amount(self):
        """Test detection of deduction by negative amount."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td>Some credit</td><td>-250,00</td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 1
        assert result.items[0].is_deduction is True

    def test_normal_item_not_deduction(self):
        """Test normal item is not marked as deduction."""
        html = """
        <html><body><table>
            <thead><tr><th>Beskrivning</th><th>Belopp</th></tr></thead>
            <tbody>
                <tr><td>Normal product</td><td>500,00</td></tr>
            </tbody>
        </table></body></html>
        """
        extractor = LineItemsExtractor()
        result = extractor.extract(html)

        assert len(result.items) == 1
        assert result.items[0].is_deduction is False


class TestHeaderDetection:
    """Tests for header row detection."""

    def test_detect_header_at_bottom(self):
        """Test detecting header at bottom of table (reversed)."""
        extractor = LineItemsExtractor()

        rows = [
            ["100,00", "Product A", "1"],
            ["200,00", "Product B", "2"],
            ["Belopp", "Beskrivning", "Antal"],  # Header at bottom
        ]

        header_idx, header, is_at_end = extractor._detect_header_row(rows)

        assert header_idx == 2
        assert is_at_end is True
        assert "Belopp" in header

    def test_detect_header_at_top(self):
        """Test detecting header at top of table."""
        extractor = LineItemsExtractor()

        rows = [
            ["Belopp", "Beskrivning", "Antal"],  # Header at top
            ["100,00", "Product A", "1"],
            ["200,00", "Product B", "2"],
        ]

        header_idx, header, is_at_end = extractor._detect_header_row(rows)

        assert header_idx == 0
        assert is_at_end is False
        assert "Belopp" in header

    def test_no_header_detected(self):
        """Test when no header is detected."""
        extractor = LineItemsExtractor()

        rows = [
            ["100,00", "Product A", "1"],
            ["200,00", "Product B", "2"],
        ]

        header_idx, header, is_at_end = extractor._detect_header_row(rows)

        assert header_idx == -1
        assert header == []
        assert is_at_end is False