"""
Field Validators Module

Provides validation functions for Swedish invoice fields.
Used by both inference (to validate extracted values) and matching (to filter candidates).
"""

import re
from datetime import datetime
from typing import Optional

from .text_cleaner import TextCleaner


class FieldValidators:
    """
    Validators for Swedish invoice field values.

    Includes:
    - Luhn (Mod10) checksum validation
    - Format validation for specific field types
    - Range validation for dates and amounts
    """

    # =========================================================================
    # Luhn (Mod10) Checksum
    # =========================================================================

    @classmethod
    def luhn_checksum(cls, digits: str) -> bool:
        """
        Validate using Luhn (Mod10) algorithm.

        Used for:
        - Bankgiro numbers
        - Plusgiro numbers
        - OCR reference numbers
        - Swedish organization numbers

        The checksum is valid if the total modulo 10 equals 0.
        """
        # 只保留数字
        digits = TextCleaner.extract_digits(digits, apply_ocr_correction=False)

        if not digits or not digits.isdigit():
            return False

        total = 0
        for i, char in enumerate(reversed(digits)):
            digit = int(char)
            if i % 2 == 1:  # 从右往左，每隔一位加倍
                digit *= 2
                if digit > 9:
                    digit -= 9
            total += digit

        return total % 10 == 0

    @classmethod
    def calculate_luhn_check_digit(cls, digits: str) -> int:
        """
        Calculate the Luhn check digit for a number.

        Given a number without check digit, returns the digit that would make it valid.
        """
        digits = TextCleaner.extract_digits(digits, apply_ocr_correction=False)

        # 计算现有数字的 Luhn 和
        total = 0
        for i, char in enumerate(reversed(digits)):
            digit = int(char)
            if i % 2 == 0:  # 注意：因为还要加一位，所以偶数位置加倍
                digit *= 2
                if digit > 9:
                    digit -= 9
            total += digit

        # 计算需要的校验位
        check_digit = (10 - (total % 10)) % 10
        return check_digit

    # =========================================================================
    # Organisation Number Validation
    # =========================================================================

    @classmethod
    def is_valid_organisation_number(cls, value: str) -> bool:
        """
        Validate Swedish organisation number.

        Format: NNNNNN-NNNN (10 digits)
        - First digit: 1-9
        - Third digit: >= 2 (distinguishes from personal numbers)
        - Last digit: Luhn check digit
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        # 处理 VAT 格式
        if len(digits) == 12 and digits.endswith('01'):
            digits = digits[:10]
        elif len(digits) == 14 and digits.startswith('46') and digits.endswith('01'):
            digits = digits[2:12]

        if len(digits) != 10:
            return False

        # 第一位 1-9
        if digits[0] == '0':
            return False

        # 第三位 >= 2 (区分组织号和个人号)
        # 注意：有些特殊组织可能不符合此规则，所以这里放宽
        # if int(digits[2]) < 2:
        #     return False

        # Luhn 校验
        return cls.luhn_checksum(digits)

    # =========================================================================
    # Bankgiro Validation
    # =========================================================================

    @classmethod
    def is_valid_bankgiro(cls, value: str) -> bool:
        """
        Validate Swedish Bankgiro number.

        Format: 7 or 8 digits with Luhn checksum
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        if len(digits) < 7 or len(digits) > 8:
            return False

        return cls.luhn_checksum(digits)

    @classmethod
    def format_bankgiro(cls, value: str) -> Optional[str]:
        """
        Format Bankgiro number to standard format.

        Returns: XXX-XXXX (7 digits) or XXXX-XXXX (8 digits), or None if invalid
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        if len(digits) == 7:
            return f"{digits[:3]}-{digits[3:]}"
        elif len(digits) == 8:
            return f"{digits[:4]}-{digits[4:]}"
        else:
            return None

    # =========================================================================
    # Plusgiro Validation
    # =========================================================================

    @classmethod
    def is_valid_plusgiro(cls, value: str) -> bool:
        """
        Validate Swedish Plusgiro number.

        Format: 2-8 digits with Luhn checksum
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        if len(digits) < 2 or len(digits) > 8:
            return False

        return cls.luhn_checksum(digits)

    @classmethod
    def format_plusgiro(cls, value: str) -> Optional[str]:
        """
        Format Plusgiro number to standard format.

        Returns: XXXXXXX-X format, or None if invalid
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        if len(digits) < 2 or len(digits) > 8:
            return None

        return f"{digits[:-1]}-{digits[-1]}"

    # =========================================================================
    # OCR Number Validation
    # =========================================================================

    @classmethod
    def is_valid_ocr_number(cls, value: str, validate_checksum: bool = True) -> bool:
        """
        Validate Swedish OCR reference number.

        - Typically 10-25 digits
        - Usually has Luhn checksum (but not always enforced)
        """
        digits = TextCleaner.extract_digits(value, apply_ocr_correction=True)

        if len(digits) < 5 or len(digits) > 25:
            return False

        if validate_checksum:
            return cls.luhn_checksum(digits)

        return True

    # =========================================================================
    # Amount Validation
    # =========================================================================

    @classmethod
    def is_valid_amount(cls, value: str, min_amount: float = 0.0, max_amount: float = 10_000_000.0) -> bool:
        """
        Validate monetary amount.

        - Must be positive (or at least >= min_amount)
        - Should be within reasonable range
        """
        try:
            # 尝试解析
            text = TextCleaner.normalize_amount_text(value)
            # 统一为点作为小数分隔符
            text = text.replace(' ', '').replace(',', '.')
            # 如果有多个点，保留最后一个
            if text.count('.') > 1:
                parts = text.rsplit('.', 1)
                text = parts[0].replace('.', '') + '.' + parts[1]

            amount = float(text)
            return min_amount <= amount <= max_amount
        except (ValueError, TypeError):
            return False

    @classmethod
    def parse_amount(cls, value: str) -> Optional[float]:
        """
        Parse amount from string, handling various formats.

        Returns float or None if parsing fails.
        """
        try:
            text = TextCleaner.normalize_amount_text(value)
            text = text.replace(' ', '')

            # 检测格式并解析
            # 瑞典/德国格式: 逗号是小数点
            if re.match(r'^[\d.]+,\d{1,2}$', text):
                text = text.replace('.', '').replace(',', '.')
            # 美国格式: 点是小数点
            elif re.match(r'^[\d,]+\.\d{1,2}$', text):
                text = text.replace(',', '')
            else:
                # 简单格式
                text = text.replace(',', '.')
                if text.count('.') > 1:
                    parts = text.rsplit('.', 1)
                    text = parts[0].replace('.', '') + '.' + parts[1]

            return float(text)
        except (ValueError, TypeError):
            return None

    # =========================================================================
    # Date Validation
    # =========================================================================

    @classmethod
    def is_valid_date(cls, value: str, min_year: int = 2000, max_year: int = 2100) -> bool:
        """
        Validate date string.

        - Year should be within reasonable range
        - Month 1-12
        - Day 1-31 (basic check)
        """
        parsed = cls.parse_date(value)
        if parsed is None:
            return False

        year, month, day = parsed
        if not (min_year <= year <= max_year):
            return False
        if not (1 <= month <= 12):
            return False
        if not (1 <= day <= 31):
            return False

        # 更精确的日期验证
        try:
            datetime(year, month, day)
            return True
        except ValueError:
            return False

    @classmethod
    def parse_date(cls, value: str) -> Optional[tuple[int, int, int]]:
        """
        Parse date from string.

        Returns (year, month, day) tuple or None.
        """
        from .format_variants import FormatVariants
        return FormatVariants._parse_date(value)

    @classmethod
    def format_date_iso(cls, value: str) -> Optional[str]:
        """
        Format date to ISO format (YYYY-MM-DD).

        Returns formatted string or None if parsing fails.
        """
        parsed = cls.parse_date(value)
        if parsed is None:
            return None

        year, month, day = parsed
        return f"{year}-{month:02d}-{day:02d}"

    # =========================================================================
    # Invoice Number Validation
    # =========================================================================

    @classmethod
    def is_valid_invoice_number(cls, value: str, min_length: int = 1, max_length: int = 30) -> bool:
        """
        Validate invoice number.

        Basic validation - just length check since invoice numbers are highly variable.
        """
        clean = TextCleaner.clean_text(value)
        if not clean:
            return False

        # 提取有意义的字符（字母和数字）
        meaningful = re.sub(r'[^a-zA-Z0-9]', '', clean)
        return min_length <= len(meaningful) <= max_length

    # =========================================================================
    # Generic Validation
    # =========================================================================

    @classmethod
    def validate_field(cls, field_name: str, value: str) -> tuple[bool, Optional[str]]:
        """
        Validate a field by name.

        Returns (is_valid, error_message).
        """
        if not value:
            return False, "Empty value"

        field_lower = field_name.lower()

        if 'organisation' in field_lower or 'org' in field_lower:
            if cls.is_valid_organisation_number(value):
                return True, None
            return False, "Invalid organisation number format or checksum"

        elif 'bankgiro' in field_lower:
            if cls.is_valid_bankgiro(value):
                return True, None
            return False, "Invalid Bankgiro format or checksum"

        elif 'plusgiro' in field_lower:
            if cls.is_valid_plusgiro(value):
                return True, None
            return False, "Invalid Plusgiro format or checksum"

        elif 'ocr' in field_lower:
            if cls.is_valid_ocr_number(value, validate_checksum=False):
                return True, None
            return False, "Invalid OCR number length"

        elif 'amount' in field_lower:
            if cls.is_valid_amount(value):
                return True, None
            return False, "Invalid amount format"

        elif 'date' in field_lower:
            if cls.is_valid_date(value):
                return True, None
            return False, "Invalid date format"

        elif 'invoice' in field_lower and 'number' in field_lower:
            if cls.is_valid_invoice_number(value):
                return True, None
            return False, "Invalid invoice number"

        else:
            # 默认：只检查非空
            if TextCleaner.clean_text(value):
                return True, None
            return False, "Empty value after cleaning"