Files
invoice-master-poc-v2/packages/inference/inference/pipeline/normalizers/base.py
Yaojia Wang a564ac9d70 WIP
2026-02-01 18:51:54 +01:00

72 lines
2.0 KiB
Python

"""
Base Normalizer Interface
Defines the contract for all field normalizers.
Each normalizer handles a specific field type's normalization and validation.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass(frozen=True)
class NormalizationResult:
"""Result of a normalization operation."""
value: str | None
is_valid: bool
error: str | None = None
@classmethod
def success(cls, value: str) -> "NormalizationResult":
"""Create a successful result."""
return cls(value=value, is_valid=True, error=None)
@classmethod
def success_with_warning(cls, value: str, warning: str) -> "NormalizationResult":
"""Create a successful result with a warning."""
return cls(value=value, is_valid=True, error=warning)
@classmethod
def failure(cls, error: str) -> "NormalizationResult":
"""Create a failed result."""
return cls(value=None, is_valid=False, error=error)
def to_tuple(self) -> tuple[str | None, bool, str | None]:
"""Convert to legacy tuple format for backward compatibility."""
return (self.value, self.is_valid, self.error)
class BaseNormalizer(ABC):
"""
Abstract base class for field normalizers.
Each normalizer is responsible for:
1. Cleaning and normalizing raw text
2. Validating the normalized value
3. Returning a standardized result
"""
@property
@abstractmethod
def field_name(self) -> str:
"""The field name this normalizer handles."""
pass
@abstractmethod
def normalize(self, text: str) -> NormalizationResult:
"""
Normalize and validate the input text.
Args:
text: Raw text to normalize
Returns:
NormalizationResult with normalized value or error
"""
pass
def __call__(self, text: str) -> NormalizationResult:
"""Allow using the normalizer as a callable."""
return self.normalize(text)