Files
invoice-master-poc-v2/packages/backend/backend/domain/invoice_validator.py
Yaojia Wang c2c8f2dd04 WIP
2026-02-03 22:29:53 +01:00

142 lines
4.0 KiB
Python

"""
Invoice Validator
Business logic for validating extracted invoice fields.
Checks for required fields, format validity, and confidence thresholds.
"""
from __future__ import annotations
from dataclasses import dataclass
from backend.domain.utils import has_value
@dataclass(frozen=True)
class ValidationIssue:
"""
Single validation issue.
Attributes:
field: Name of the field with the issue
severity: One of "error", "warning", "info"
message: Human-readable description of the issue
"""
field: str
severity: str
message: str
@dataclass(frozen=True)
class ValidationResult:
"""
Immutable result of invoice validation.
Attributes:
is_valid: True if no errors (warnings are allowed)
issues: Tuple of validation issues found
confidence: Average confidence score of validated fields
"""
is_valid: bool
issues: tuple[ValidationIssue, ...]
confidence: float
class InvoiceValidator:
"""
Validates extracted invoice fields for completeness and consistency.
Validation Rules:
1. Required fields must be present (Amount)
2. At least one payment reference should be present (warning if missing)
3. Field confidence should be above threshold (warning if below)
Required fields:
- Amount
Payment reference fields (at least one expected):
- OCR
- Bankgiro
- Plusgiro
- payment_line
"""
REQUIRED_FIELDS: tuple[str, ...] = ("Amount",)
PAYMENT_REF_FIELDS: tuple[str, ...] = ("OCR", "Bankgiro", "Plusgiro", "payment_line")
DEFAULT_MIN_CONFIDENCE: float = 0.5
def __init__(self, min_confidence: float = DEFAULT_MIN_CONFIDENCE) -> None:
"""
Initialize validator.
Args:
min_confidence: Minimum confidence threshold for valid fields.
Fields below this threshold produce warnings.
"""
self._min_confidence = min_confidence
def validate(
self,
fields: dict[str, str | None],
confidence: dict[str, float],
) -> ValidationResult:
"""
Validate extracted invoice fields.
Args:
fields: Dictionary of field names to extracted values
confidence: Dictionary of field names to confidence scores
Returns:
Immutable ValidationResult with validity status and issues
"""
issues: list[ValidationIssue] = []
# Check required fields
for field in self.REQUIRED_FIELDS:
if not has_value(fields.get(field)):
issues.append(
ValidationIssue(
field=field,
severity="error",
message=f"Required field '{field}' is missing",
)
)
# Check payment reference (at least one expected)
has_payment_ref = any(
has_value(fields.get(f)) for f in self.PAYMENT_REF_FIELDS
)
if not has_payment_ref:
issues.append(
ValidationIssue(
field="payment_reference",
severity="warning",
message="No payment reference (OCR, Bankgiro, Plusgiro, or payment_line)",
)
)
# Check confidence thresholds
for field, conf in confidence.items():
if conf < self._min_confidence:
issues.append(
ValidationIssue(
field=field,
severity="warning",
message=f"Low confidence ({conf:.2f}) for field '{field}'",
)
)
# Calculate overall validity
has_errors = any(i.severity == "error" for i in issues)
avg_confidence = (
sum(confidence.values()) / len(confidence) if confidence else 0.0
)
return ValidationResult(
is_valid=not has_errors,
issues=tuple(issues),
confidence=avg_confidence,
)