Update paddle, and support invoice line item

This commit is contained in:
Yaojia Wang
2026-02-03 21:28:06 +01:00
parent c4e3773df1
commit 35988b1ebf
41 changed files with 6832 additions and 48 deletions

View File

@@ -301,3 +301,227 @@ class TestInferenceServiceImports:
assert YOLODetector is not None
assert render_pdf_to_images is not None
assert InferenceService is not None
class TestBusinessFeaturesAPI:
"""Tests for business features (line items, VAT) in API."""
@patch('backend.pipeline.pipeline.InferencePipeline')
@patch('backend.pipeline.yolo_detector.YOLODetector')
def test_infer_with_extract_line_items_false_by_default(
self,
mock_yolo_detector,
mock_pipeline,
client,
sample_png_bytes,
):
"""Test that extract_line_items defaults to False."""
# Setup mocks
mock_detector_instance = Mock()
mock_pipeline_instance = Mock()
mock_yolo_detector.return_value = mock_detector_instance
mock_pipeline.return_value = mock_pipeline_instance
# Mock pipeline result
mock_result = Mock()
mock_result.fields = {"InvoiceNumber": "12345"}
mock_result.confidence = {"InvoiceNumber": 0.95}
mock_result.success = True
mock_result.errors = []
mock_result.raw_detections = []
mock_result.document_id = "test123"
mock_result.document_type = "invoice"
mock_result.processing_time_ms = 100.0
mock_result.visualization_path = None
mock_result.detections = []
mock_pipeline_instance.process_image.return_value = mock_result
# Make request without extract_line_items parameter
response = client.post(
"/api/v1/infer",
files={"file": ("test.png", sample_png_bytes, "image/png")},
)
assert response.status_code == 200
data = response.json()
# Business features should be None when not requested
assert data["result"]["line_items"] is None
assert data["result"]["vat_summary"] is None
assert data["result"]["vat_validation"] is None
@patch('backend.pipeline.pipeline.InferencePipeline')
@patch('backend.pipeline.yolo_detector.YOLODetector')
def test_infer_with_extract_line_items_returns_business_features(
self,
mock_yolo_detector,
mock_pipeline,
client,
tmp_path,
):
"""Test that extract_line_items=True returns business features."""
# Setup mocks
mock_detector_instance = Mock()
mock_pipeline_instance = Mock()
mock_yolo_detector.return_value = mock_detector_instance
mock_pipeline.return_value = mock_pipeline_instance
# Create a test PDF file
pdf_path = tmp_path / "test.pdf"
pdf_path.write_bytes(b'%PDF-1.4 fake pdf content')
# Mock pipeline result with business features
mock_result = Mock()
mock_result.fields = {"Amount": "12500,00"}
mock_result.confidence = {"Amount": 0.95}
mock_result.success = True
mock_result.errors = []
mock_result.raw_detections = []
mock_result.document_id = "test123"
mock_result.document_type = "invoice"
mock_result.processing_time_ms = 150.0
mock_result.visualization_path = None
mock_result.detections = []
# Mock line items
mock_result.line_items = Mock()
mock_result._line_items_to_json.return_value = {
"items": [
{
"row_index": 0,
"description": "Product A",
"quantity": "2",
"unit": "st",
"unit_price": "5000,00",
"amount": "10000,00",
"article_number": "ART001",
"vat_rate": "25",
"confidence": 0.9,
}
],
"header_row": ["Beskrivning", "Antal", "Pris", "Belopp"],
"total_amount": "10000,00",
}
# Mock VAT summary
mock_result.vat_summary = Mock()
mock_result._vat_summary_to_json.return_value = {
"breakdowns": [
{
"rate": 25.0,
"base_amount": "10000,00",
"vat_amount": "2500,00",
"source": "regex",
}
],
"total_excl_vat": "10000,00",
"total_vat": "2500,00",
"total_incl_vat": "12500,00",
"confidence": 0.9,
}
# Mock VAT validation
mock_result.vat_validation = Mock()
mock_result._vat_validation_to_json.return_value = {
"is_valid": True,
"confidence_score": 0.95,
"math_checks": [
{
"rate": 25.0,
"base_amount": 10000.0,
"expected_vat": 2500.0,
"actual_vat": 2500.0,
"is_valid": True,
"tolerance": 0.5,
}
],
"total_check": True,
"line_items_vs_summary": True,
"amount_consistency": True,
"needs_review": False,
"review_reasons": [],
}
mock_pipeline_instance.process_pdf.return_value = mock_result
# Make request with extract_line_items=true
response = client.post(
"/api/v1/infer",
files={"file": ("test.pdf", pdf_path.open("rb"), "application/pdf")},
data={"extract_line_items": "true"},
)
assert response.status_code == 200
data = response.json()
# Verify business features are included
assert data["result"]["line_items"] is not None
assert len(data["result"]["line_items"]["items"]) == 1
assert data["result"]["line_items"]["items"][0]["description"] == "Product A"
assert data["result"]["line_items"]["items"][0]["amount"] == "10000,00"
assert data["result"]["vat_summary"] is not None
assert len(data["result"]["vat_summary"]["breakdowns"]) == 1
assert data["result"]["vat_summary"]["breakdowns"][0]["rate"] == 25.0
assert data["result"]["vat_summary"]["total_incl_vat"] == "12500,00"
assert data["result"]["vat_validation"] is not None
assert data["result"]["vat_validation"]["is_valid"] is True
assert data["result"]["vat_validation"]["confidence_score"] == 0.95
def test_schema_imports_work_correctly(self):
"""Test that all business feature schemas can be imported."""
from backend.web.schemas.inference import (
LineItemSchema,
LineItemsResultSchema,
VATBreakdownSchema,
VATSummarySchema,
MathCheckResultSchema,
VATValidationResultSchema,
InferenceResult,
)
# Verify schemas can be instantiated
line_item = LineItemSchema(
row_index=0,
description="Test",
amount="100",
)
assert line_item.description == "Test"
vat_breakdown = VATBreakdownSchema(
rate=25.0,
base_amount="100",
vat_amount="25",
)
assert vat_breakdown.rate == 25.0
# Verify InferenceResult includes business feature fields
result = InferenceResult(
document_id="test",
success=True,
processing_time_ms=100.0,
)
assert result.line_items is None
assert result.vat_summary is None
assert result.vat_validation is None
def test_service_result_has_business_feature_fields(self):
"""Test that ServiceResult dataclass includes business feature fields."""
from backend.web.services.inference import ServiceResult
result = ServiceResult(document_id="test123")
# Verify business feature fields exist and default to None
assert result.line_items is None
assert result.vat_summary is None
assert result.vat_validation is None
# Verify they can be set
result.line_items = {"items": []}
result.vat_summary = {"breakdowns": []}
result.vat_validation = {"is_valid": True}
assert result.line_items == {"items": []}
assert result.vat_summary == {"breakdowns": []}
assert result.vat_validation == {"is_valid": True}

View File

@@ -133,6 +133,7 @@ class TestInferenceServiceInitialization:
use_gpu=False,
dpi=150,
enable_fallback=True,
enable_business_features=False,
)
@patch('backend.pipeline.pipeline.InferencePipeline')