This commit is contained in:
Yaojia Wang
2026-01-16 23:10:01 +01:00
parent 53d1e8db25
commit 425b8fdedf
10 changed files with 653 additions and 87 deletions

View File

@@ -25,6 +25,12 @@ class InvoiceRow:
Bankgiro: str | None = None
Plusgiro: str | None = None
Amount: Decimal | None = None
# New fields
split: str | None = None # train/test split indicator
customer_number: str | None = None # Customer number (no matching needed)
supplier_name: str | None = None # Supplier name (no matching)
supplier_organisation_number: str | None = None # Swedish org number (needs matching)
supplier_accounts: str | None = None # Supplier accounts (needs matching)
# Raw values for reference
raw_data: dict = field(default_factory=dict)
@@ -40,6 +46,8 @@ class InvoiceRow:
'Bankgiro': self.Bankgiro,
'Plusgiro': self.Plusgiro,
'Amount': str(self.Amount) if self.Amount else None,
'supplier_organisation_number': self.supplier_organisation_number,
'supplier_accounts': self.supplier_accounts,
}
def get_field_value(self, field_name: str) -> str | None:
@@ -68,6 +76,12 @@ class CSVLoader:
'Bankgiro': 'Bankgiro',
'Plusgiro': 'Plusgiro',
'Amount': 'Amount',
# New fields
'split': 'split',
'customer_number': 'customer_number',
'supplier_name': 'supplier_name',
'supplier_organisation_number': 'supplier_organisation_number',
'supplier_accounts': 'supplier_accounts',
}
def __init__(
@@ -200,6 +214,12 @@ class CSVLoader:
Bankgiro=self._parse_string(row.get('Bankgiro')),
Plusgiro=self._parse_string(row.get('Plusgiro')),
Amount=self._parse_amount(row.get('Amount')),
# New fields
split=self._parse_string(row.get('split')),
customer_number=self._parse_string(row.get('customer_number')),
supplier_name=self._parse_string(row.get('supplier_name')),
supplier_organisation_number=self._parse_string(row.get('supplier_organisation_number')),
supplier_accounts=self._parse_string(row.get('supplier_accounts')),
raw_data=dict(row)
)
@@ -318,14 +338,16 @@ class CSVLoader:
row.OCR,
row.Bankgiro,
row.Plusgiro,
row.Amount
row.Amount,
row.supplier_organisation_number,
row.supplier_accounts,
]
if not any(matchable_fields):
issues.append({
'row': i,
'doc_id': row.DocumentId,
'field': 'All',
'issue': 'No matchable fields (InvoiceNumber/OCR/Bankgiro/Plusgiro/Amount)'
'issue': 'No matchable fields (InvoiceNumber/OCR/Bankgiro/Plusgiro/Amount/supplier_organisation_number/supplier_accounts)'
})
return issues