Add payment line parser and fix OCR override from payment_line
- Add MachineCodeParser for Swedish invoice payment line parsing - Fix OCR Reference extraction by normalizing account number spaces - Add cross-validation tests for pipeline and field_extractor - Update UI layout for compact upload and full-width results Key changes: - machine_code_parser.py: Handle spaces in Bankgiro numbers (e.g. "78 2 1 713") - pipeline.py: OCR and Amount override from payment_line, BG/PG comparison only - field_extractor.py: Improved invoice number normalization - app.py: Responsive UI layout changes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,7 @@ FIELD_CLASSES = {
|
||||
'Amount': 6,
|
||||
'supplier_organisation_number': 7,
|
||||
'customer_number': 8,
|
||||
'payment_line': 9, # Machine code payment line at bottom of invoice
|
||||
}
|
||||
|
||||
# Fields that need matching but map to other YOLO classes
|
||||
@@ -43,6 +44,7 @@ CLASS_NAMES = [
|
||||
'amount',
|
||||
'supplier_org_number',
|
||||
'customer_number',
|
||||
'payment_line', # Machine code payment line at bottom of invoice
|
||||
]
|
||||
|
||||
|
||||
@@ -160,6 +162,68 @@ class AnnotationGenerator:
|
||||
|
||||
return annotations
|
||||
|
||||
def add_payment_line_annotation(
|
||||
self,
|
||||
annotations: list[YOLOAnnotation],
|
||||
payment_line_bbox: tuple[float, float, float, float],
|
||||
confidence: float,
|
||||
image_width: float,
|
||||
image_height: float,
|
||||
dpi: int = 300
|
||||
) -> list[YOLOAnnotation]:
|
||||
"""
|
||||
Add payment_line annotation from machine code parser result.
|
||||
|
||||
Args:
|
||||
annotations: Existing list of annotations to append to
|
||||
payment_line_bbox: Bounding box (x0, y0, x1, y1) in PDF coordinates
|
||||
confidence: Confidence score from machine code parser
|
||||
image_width: Width of the rendered image in pixels
|
||||
image_height: Height of the rendered image in pixels
|
||||
dpi: DPI used for rendering
|
||||
|
||||
Returns:
|
||||
Updated annotations list with payment_line annotation added
|
||||
"""
|
||||
if not payment_line_bbox or confidence < self.min_confidence:
|
||||
return annotations
|
||||
|
||||
# Scale factor to convert PDF points (72 DPI) to rendered pixels
|
||||
scale = dpi / 72.0
|
||||
|
||||
x0, y0, x1, y1 = payment_line_bbox
|
||||
x0, y0, x1, y1 = x0 * scale, y0 * scale, x1 * scale, y1 * scale
|
||||
|
||||
# Add absolute padding
|
||||
pad = self.bbox_padding_px
|
||||
x0 = max(0, x0 - pad)
|
||||
y0 = max(0, y0 - pad)
|
||||
x1 = min(image_width, x1 + pad)
|
||||
y1 = min(image_height, y1 + pad)
|
||||
|
||||
# Convert to YOLO format (normalized center + size)
|
||||
x_center = (x0 + x1) / 2 / image_width
|
||||
y_center = (y0 + y1) / 2 / image_height
|
||||
width = (x1 - x0) / image_width
|
||||
height = (y1 - y0) / image_height
|
||||
|
||||
# Clamp values to 0-1
|
||||
x_center = max(0, min(1, x_center))
|
||||
y_center = max(0, min(1, y_center))
|
||||
width = max(0, min(1, width))
|
||||
height = max(0, min(1, height))
|
||||
|
||||
annotations.append(YOLOAnnotation(
|
||||
class_id=FIELD_CLASSES['payment_line'],
|
||||
x_center=x_center,
|
||||
y_center=y_center,
|
||||
width=width,
|
||||
height=height,
|
||||
confidence=confidence
|
||||
))
|
||||
|
||||
return annotations
|
||||
|
||||
def save_annotations(
|
||||
self,
|
||||
annotations: list[YOLOAnnotation],
|
||||
|
||||
Reference in New Issue
Block a user