Update paddle, and support invoice line item
This commit is contained in:
@@ -42,6 +42,11 @@ class ServiceResult:
|
||||
visualization_path: Path | None = None
|
||||
errors: list[str] = field(default_factory=list)
|
||||
|
||||
# Business features (optional, populated when extract_line_items=True)
|
||||
line_items: dict | None = None
|
||||
vat_summary: dict | None = None
|
||||
vat_validation: dict | None = None
|
||||
|
||||
|
||||
class InferenceService:
|
||||
"""
|
||||
@@ -74,6 +79,7 @@ class InferenceService:
|
||||
self._detector = None
|
||||
self._is_initialized = False
|
||||
self._current_model_path: Path | None = None
|
||||
self._business_features_enabled = False
|
||||
|
||||
def _resolve_model_path(self) -> Path:
|
||||
"""Resolve the model path to use for inference.
|
||||
@@ -95,12 +101,16 @@ class InferenceService:
|
||||
|
||||
return self.model_config.model_path
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Initialize the inference pipeline (lazy loading)."""
|
||||
def initialize(self, enable_business_features: bool = False) -> None:
|
||||
"""Initialize the inference pipeline (lazy loading).
|
||||
|
||||
Args:
|
||||
enable_business_features: Whether to enable line items and VAT extraction
|
||||
"""
|
||||
if self._is_initialized:
|
||||
return
|
||||
|
||||
logger.info("Initializing inference service...")
|
||||
logger.info(f"Initializing inference service (business_features={enable_business_features})...")
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
@@ -118,16 +128,18 @@ class InferenceService:
|
||||
device="cuda" if self.model_config.use_gpu else "cpu",
|
||||
)
|
||||
|
||||
# Initialize full pipeline
|
||||
# Initialize full pipeline with optional business features
|
||||
self._pipeline = InferencePipeline(
|
||||
model_path=str(model_path),
|
||||
confidence_threshold=self.model_config.confidence_threshold,
|
||||
use_gpu=self.model_config.use_gpu,
|
||||
dpi=self.model_config.dpi,
|
||||
enable_fallback=True,
|
||||
enable_business_features=enable_business_features,
|
||||
)
|
||||
|
||||
self._is_initialized = True
|
||||
self._business_features_enabled = enable_business_features
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"Inference service initialized in {elapsed:.2f}s with model: {model_path}")
|
||||
|
||||
@@ -242,6 +254,7 @@ class InferenceService:
|
||||
pdf_path: Path,
|
||||
document_id: str | None = None,
|
||||
save_visualization: bool = True,
|
||||
extract_line_items: bool = False,
|
||||
) -> ServiceResult:
|
||||
"""
|
||||
Process a PDF file and extract invoice fields.
|
||||
@@ -250,12 +263,17 @@ class InferenceService:
|
||||
pdf_path: Path to PDF file
|
||||
document_id: Optional document ID
|
||||
save_visualization: Whether to save visualization
|
||||
extract_line_items: Whether to extract line items and VAT info
|
||||
|
||||
Returns:
|
||||
ServiceResult with extracted fields
|
||||
"""
|
||||
if not self._is_initialized:
|
||||
self.initialize()
|
||||
self.initialize(enable_business_features=extract_line_items)
|
||||
elif extract_line_items and not self._business_features_enabled:
|
||||
# Reinitialize with business features if needed
|
||||
self._is_initialized = False
|
||||
self.initialize(enable_business_features=True)
|
||||
|
||||
doc_id = document_id or str(uuid.uuid4())[:8]
|
||||
start_time = time.time()
|
||||
@@ -263,8 +281,12 @@ class InferenceService:
|
||||
result = ServiceResult(document_id=doc_id)
|
||||
|
||||
try:
|
||||
# Run inference pipeline
|
||||
pipeline_result = self._pipeline.process_pdf(pdf_path, document_id=doc_id)
|
||||
# Run inference pipeline with optional business features
|
||||
pipeline_result = self._pipeline.process_pdf(
|
||||
pdf_path,
|
||||
document_id=doc_id,
|
||||
extract_line_items=extract_line_items,
|
||||
)
|
||||
|
||||
result.fields = pipeline_result.fields
|
||||
result.confidence = pipeline_result.confidence
|
||||
@@ -288,6 +310,12 @@ class InferenceService:
|
||||
for d in pipeline_result.raw_detections
|
||||
]
|
||||
|
||||
# Include business features if extracted
|
||||
if extract_line_items:
|
||||
result.line_items = pipeline_result._line_items_to_json() if pipeline_result.line_items else None
|
||||
result.vat_summary = pipeline_result._vat_summary_to_json() if pipeline_result.vat_summary else None
|
||||
result.vat_validation = pipeline_result._vat_validation_to_json() if pipeline_result.vat_validation else None
|
||||
|
||||
# Save visualization (render first page)
|
||||
if save_visualization and pipeline_result.raw_detections:
|
||||
viz_path = self._save_pdf_visualization(pdf_path, doc_id)
|
||||
|
||||
Reference in New Issue
Block a user