code issue fix
This commit is contained in:
@@ -260,6 +260,45 @@ class FieldNormalizer:
|
||||
|
||||
return list(set(v for v in variants if v))
|
||||
|
||||
@staticmethod
|
||||
def normalize_customer_number(value: str) -> list[str]:
|
||||
"""
|
||||
Normalize customer number.
|
||||
|
||||
Customer numbers can have various formats:
|
||||
- Alphanumeric codes: 'EMM 256-6', 'ABC123', 'A-1234'
|
||||
- Pure numbers: '12345', '123-456'
|
||||
|
||||
Examples:
|
||||
'EMM 256-6' -> ['EMM 256-6', 'EMM256-6', 'EMM2566']
|
||||
'ABC 123' -> ['ABC 123', 'ABC123']
|
||||
"""
|
||||
value = FieldNormalizer.clean_text(value)
|
||||
variants = [value]
|
||||
|
||||
# Version without spaces
|
||||
no_space = value.replace(' ', '')
|
||||
if no_space != value:
|
||||
variants.append(no_space)
|
||||
|
||||
# Version without dashes
|
||||
no_dash = value.replace('-', '')
|
||||
if no_dash != value:
|
||||
variants.append(no_dash)
|
||||
|
||||
# Version without spaces and dashes
|
||||
clean = value.replace(' ', '').replace('-', '')
|
||||
if clean != value and clean not in variants:
|
||||
variants.append(clean)
|
||||
|
||||
# Uppercase and lowercase versions
|
||||
if value.upper() != value:
|
||||
variants.append(value.upper())
|
||||
if value.lower() != value:
|
||||
variants.append(value.lower())
|
||||
|
||||
return list(set(v for v in variants if v))
|
||||
|
||||
@staticmethod
|
||||
def normalize_amount(value: str) -> list[str]:
|
||||
"""
|
||||
@@ -414,7 +453,7 @@ class FieldNormalizer:
|
||||
]
|
||||
|
||||
# Ambiguous patterns - try both DD/MM and MM/DD interpretations
|
||||
ambiguous_patterns = [
|
||||
ambiguous_patterns_4digit_year = [
|
||||
# Format with / - could be DD/MM/YYYY (European) or MM/DD/YYYY (US)
|
||||
r'^(\d{1,2})/(\d{1,2})/(\d{4})$',
|
||||
# Format with . - typically European DD.MM.YYYY
|
||||
@@ -423,6 +462,16 @@ class FieldNormalizer:
|
||||
r'^(\d{1,2})-(\d{1,2})-(\d{4})$',
|
||||
]
|
||||
|
||||
# Patterns with 2-digit year (common in Swedish invoices)
|
||||
ambiguous_patterns_2digit_year = [
|
||||
# Format DD.MM.YY (e.g., 02.08.25 for 2025-08-02)
|
||||
r'^(\d{1,2})\.(\d{1,2})\.(\d{2})$',
|
||||
# Format DD/MM/YY
|
||||
r'^(\d{1,2})/(\d{1,2})/(\d{2})$',
|
||||
# Format DD-MM-YY
|
||||
r'^(\d{1,2})-(\d{1,2})-(\d{2})$',
|
||||
]
|
||||
|
||||
# Try unambiguous patterns first
|
||||
for pattern, extractor in date_patterns:
|
||||
match = re.match(pattern, value)
|
||||
@@ -434,9 +483,9 @@ class FieldNormalizer:
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Try ambiguous patterns with both interpretations
|
||||
# Try ambiguous patterns with 4-digit year
|
||||
if not parsed_dates:
|
||||
for pattern in ambiguous_patterns:
|
||||
for pattern in ambiguous_patterns_4digit_year:
|
||||
match = re.match(pattern, value)
|
||||
if match:
|
||||
n1, n2, year = int(match[1]), int(match[2]), int(match[3])
|
||||
@@ -457,6 +506,31 @@ class FieldNormalizer:
|
||||
if parsed_dates:
|
||||
break
|
||||
|
||||
# Try ambiguous patterns with 2-digit year (e.g., 02.08.25)
|
||||
if not parsed_dates:
|
||||
for pattern in ambiguous_patterns_2digit_year:
|
||||
match = re.match(pattern, value)
|
||||
if match:
|
||||
n1, n2, yy = int(match[1]), int(match[2]), int(match[3])
|
||||
# Convert 2-digit year to 4-digit (00-49 -> 2000s, 50-99 -> 1900s)
|
||||
year = 2000 + yy if yy < 50 else 1900 + yy
|
||||
|
||||
# Try DD/MM/YY (European - day first, most common in Sweden)
|
||||
try:
|
||||
parsed_dates.append(datetime(year, n2, n1))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try MM/DD/YY (US - month first) if different and valid
|
||||
if n1 != n2:
|
||||
try:
|
||||
parsed_dates.append(datetime(year, n1, n2))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if parsed_dates:
|
||||
break
|
||||
|
||||
# Try Swedish month names
|
||||
if not parsed_dates:
|
||||
for month_name, month_num in FieldNormalizer.SWEDISH_MONTHS.items():
|
||||
@@ -527,6 +601,7 @@ NORMALIZERS: dict[str, Callable[[str], list[str]]] = {
|
||||
'InvoiceDueDate': FieldNormalizer.normalize_date,
|
||||
'supplier_organisation_number': FieldNormalizer.normalize_organisation_number,
|
||||
'supplier_accounts': FieldNormalizer.normalize_supplier_accounts,
|
||||
'customer_number': FieldNormalizer.normalize_customer_number,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user