Files
invoice-master-poc-v2/packages/backend/backend/pipeline/normalizers/__init__.py
Yaojia Wang b602d0a340 re-structure
2026-02-01 22:55:31 +01:00

61 lines
1.9 KiB
Python

"""
Normalizers Package
Provides field-specific normalizers for invoice data extraction.
Each normalizer handles a specific field type's normalization and validation.
"""
from .base import BaseNormalizer, NormalizationResult
from .invoice_number import InvoiceNumberNormalizer
from .ocr_number import OcrNumberNormalizer
from .bankgiro import BankgiroNormalizer
from .plusgiro import PlusgiroNormalizer
from .amount import AmountNormalizer, EnhancedAmountNormalizer
from .date import DateNormalizer, EnhancedDateNormalizer
from .supplier_org_number import SupplierOrgNumberNormalizer
__all__ = [
# Base
"BaseNormalizer",
"NormalizationResult",
# Normalizers
"InvoiceNumberNormalizer",
"OcrNumberNormalizer",
"BankgiroNormalizer",
"PlusgiroNormalizer",
"AmountNormalizer",
"EnhancedAmountNormalizer",
"DateNormalizer",
"EnhancedDateNormalizer",
"SupplierOrgNumberNormalizer",
]
# Registry of all normalizers by field name
def create_normalizer_registry(
use_enhanced: bool = False,
) -> dict[str, BaseNormalizer]:
"""
Create a registry mapping field names to normalizer instances.
Args:
use_enhanced: Whether to use enhanced normalizers for amount/date
Returns:
Dictionary mapping field names to normalizer instances
"""
amount_normalizer = EnhancedAmountNormalizer() if use_enhanced else AmountNormalizer()
date_normalizer = EnhancedDateNormalizer() if use_enhanced else DateNormalizer()
return {
"InvoiceNumber": InvoiceNumberNormalizer(),
"OCR": OcrNumberNormalizer(),
"Bankgiro": BankgiroNormalizer(),
"Plusgiro": PlusgiroNormalizer(),
"Amount": amount_normalizer,
"InvoiceDate": date_normalizer,
"InvoiceDueDate": date_normalizer,
# Note: field_name is "supplier_organisation_number" (from CLASS_TO_FIELD mapping)
"supplier_organisation_number": SupplierOrgNumberNormalizer(),
}