""" Field Normalization Module Normalizes field values to generate multiple candidate forms for matching. This module now delegates to individual normalizer modules for each field type. Each normalizer is a separate, reusable module that can be used independently. """ from dataclasses import dataclass from typing import Callable from src.utils.text_cleaner import TextCleaner # Import individual normalizers from .normalizers import ( InvoiceNumberNormalizer, OCRNormalizer, BankgiroNormalizer, PlusgiroNormalizer, AmountNormalizer, DateNormalizer, OrganisationNumberNormalizer, SupplierAccountsNormalizer, CustomerNumberNormalizer, ) @dataclass class NormalizedValue: """Represents a normalized value with its variants.""" original: str variants: list[str] field_type: str class FieldNormalizer: """ Handles normalization of different invoice field types. This class now acts as a facade that delegates to individual normalizer modules. Each field type has its own specialized normalizer for better modularity and reusability. """ # Instantiate individual normalizers _invoice_number = InvoiceNumberNormalizer() _ocr_number = OCRNormalizer() _bankgiro = BankgiroNormalizer() _plusgiro = PlusgiroNormalizer() _amount = AmountNormalizer() _date = DateNormalizer() _organisation_number = OrganisationNumberNormalizer() _supplier_accounts = SupplierAccountsNormalizer() _customer_number = CustomerNumberNormalizer() # Common Swedish month names for backward compatibility SWEDISH_MONTHS = DateNormalizer.SWEDISH_MONTHS @staticmethod def clean_text(text: str) -> str: """ Remove invisible characters and normalize whitespace and dashes. Delegates to shared TextCleaner for consistency. """ return TextCleaner.clean_text(text) @staticmethod def normalize_invoice_number(value: str) -> list[str]: """ Normalize invoice number. Delegates to InvoiceNumberNormalizer. """ return FieldNormalizer._invoice_number.normalize(value) @staticmethod def normalize_ocr_number(value: str) -> list[str]: """ Normalize OCR number (Swedish payment reference). Delegates to OCRNormalizer. """ return FieldNormalizer._ocr_number.normalize(value) @staticmethod def normalize_bankgiro(value: str) -> list[str]: """ Normalize Bankgiro number. Delegates to BankgiroNormalizer. """ return FieldNormalizer._bankgiro.normalize(value) @staticmethod def normalize_plusgiro(value: str) -> list[str]: """ Normalize Plusgiro number. Delegates to PlusgiroNormalizer. """ return FieldNormalizer._plusgiro.normalize(value) @staticmethod def normalize_organisation_number(value: str) -> list[str]: """ Normalize Swedish organisation number and generate VAT number variants. Delegates to OrganisationNumberNormalizer. """ return FieldNormalizer._organisation_number.normalize(value) @staticmethod def normalize_supplier_accounts(value: str) -> list[str]: """ Normalize supplier accounts field. Delegates to SupplierAccountsNormalizer. """ return FieldNormalizer._supplier_accounts.normalize(value) @staticmethod def normalize_customer_number(value: str) -> list[str]: """ Normalize customer number. Delegates to CustomerNumberNormalizer. """ return FieldNormalizer._customer_number.normalize(value) @staticmethod def normalize_amount(value: str) -> list[str]: """ Normalize monetary amount. Delegates to AmountNormalizer. """ return FieldNormalizer._amount.normalize(value) @staticmethod def normalize_date(value: str) -> list[str]: """ Normalize date to YYYY-MM-DD and generate variants. Delegates to DateNormalizer. """ return FieldNormalizer._date.normalize(value) # Field type to normalizer mapping NORMALIZERS: dict[str, Callable[[str], list[str]]] = { 'InvoiceNumber': FieldNormalizer.normalize_invoice_number, 'OCR': FieldNormalizer.normalize_ocr_number, 'Bankgiro': FieldNormalizer.normalize_bankgiro, 'Plusgiro': FieldNormalizer.normalize_plusgiro, 'Amount': FieldNormalizer.normalize_amount, 'InvoiceDate': FieldNormalizer.normalize_date, 'InvoiceDueDate': FieldNormalizer.normalize_date, 'supplier_organisation_number': FieldNormalizer.normalize_organisation_number, 'supplier_accounts': FieldNormalizer.normalize_supplier_accounts, 'customer_number': FieldNormalizer.normalize_customer_number, } def normalize_field(field_name: str, value: str) -> list[str]: """ Normalize a field value based on its type. Args: field_name: Name of the field (e.g., 'InvoiceNumber', 'Amount') value: Raw value to normalize Returns: List of normalized variants """ if value is None or (isinstance(value, str) and not value.strip()): return [] value = str(value) normalizer = NORMALIZERS.get(field_name) if normalizer: return normalizer(value) # Default: just clean the text return [FieldNormalizer.clean_text(value)]