142 lines
4.0 KiB
Python
142 lines
4.0 KiB
Python
"""
|
|
Invoice Validator
|
|
|
|
Business logic for validating extracted invoice fields.
|
|
Checks for required fields, format validity, and confidence thresholds.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from backend.domain.utils import has_value
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ValidationIssue:
|
|
"""
|
|
Single validation issue.
|
|
|
|
Attributes:
|
|
field: Name of the field with the issue
|
|
severity: One of "error", "warning", "info"
|
|
message: Human-readable description of the issue
|
|
"""
|
|
|
|
field: str
|
|
severity: str
|
|
message: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ValidationResult:
|
|
"""
|
|
Immutable result of invoice validation.
|
|
|
|
Attributes:
|
|
is_valid: True if no errors (warnings are allowed)
|
|
issues: Tuple of validation issues found
|
|
confidence: Average confidence score of validated fields
|
|
"""
|
|
|
|
is_valid: bool
|
|
issues: tuple[ValidationIssue, ...]
|
|
confidence: float
|
|
|
|
|
|
class InvoiceValidator:
|
|
"""
|
|
Validates extracted invoice fields for completeness and consistency.
|
|
|
|
Validation Rules:
|
|
1. Required fields must be present (Amount)
|
|
2. At least one payment reference should be present (warning if missing)
|
|
3. Field confidence should be above threshold (warning if below)
|
|
|
|
Required fields:
|
|
- Amount
|
|
|
|
Payment reference fields (at least one expected):
|
|
- OCR
|
|
- Bankgiro
|
|
- Plusgiro
|
|
- payment_line
|
|
"""
|
|
|
|
REQUIRED_FIELDS: tuple[str, ...] = ("Amount",)
|
|
PAYMENT_REF_FIELDS: tuple[str, ...] = ("OCR", "Bankgiro", "Plusgiro", "payment_line")
|
|
DEFAULT_MIN_CONFIDENCE: float = 0.5
|
|
|
|
def __init__(self, min_confidence: float = DEFAULT_MIN_CONFIDENCE) -> None:
|
|
"""
|
|
Initialize validator.
|
|
|
|
Args:
|
|
min_confidence: Minimum confidence threshold for valid fields.
|
|
Fields below this threshold produce warnings.
|
|
"""
|
|
self._min_confidence = min_confidence
|
|
|
|
def validate(
|
|
self,
|
|
fields: dict[str, str | None],
|
|
confidence: dict[str, float],
|
|
) -> ValidationResult:
|
|
"""
|
|
Validate extracted invoice fields.
|
|
|
|
Args:
|
|
fields: Dictionary of field names to extracted values
|
|
confidence: Dictionary of field names to confidence scores
|
|
|
|
Returns:
|
|
Immutable ValidationResult with validity status and issues
|
|
"""
|
|
issues: list[ValidationIssue] = []
|
|
|
|
# Check required fields
|
|
for field in self.REQUIRED_FIELDS:
|
|
if not has_value(fields.get(field)):
|
|
issues.append(
|
|
ValidationIssue(
|
|
field=field,
|
|
severity="error",
|
|
message=f"Required field '{field}' is missing",
|
|
)
|
|
)
|
|
|
|
# Check payment reference (at least one expected)
|
|
has_payment_ref = any(
|
|
has_value(fields.get(f)) for f in self.PAYMENT_REF_FIELDS
|
|
)
|
|
if not has_payment_ref:
|
|
issues.append(
|
|
ValidationIssue(
|
|
field="payment_reference",
|
|
severity="warning",
|
|
message="No payment reference (OCR, Bankgiro, Plusgiro, or payment_line)",
|
|
)
|
|
)
|
|
|
|
# Check confidence thresholds
|
|
for field, conf in confidence.items():
|
|
if conf < self._min_confidence:
|
|
issues.append(
|
|
ValidationIssue(
|
|
field=field,
|
|
severity="warning",
|
|
message=f"Low confidence ({conf:.2f}) for field '{field}'",
|
|
)
|
|
)
|
|
|
|
# Calculate overall validity
|
|
has_errors = any(i.severity == "error" for i in issues)
|
|
avg_confidence = (
|
|
sum(confidence.values()) / len(confidence) if confidence else 0.0
|
|
)
|
|
|
|
return ValidationResult(
|
|
is_valid=not has_errors,
|
|
issues=tuple(issues),
|
|
confidence=avg_confidence,
|
|
)
|