Add payment line parser and fix OCR override from payment_line
- Add MachineCodeParser for Swedish invoice payment line parsing - Fix OCR Reference extraction by normalizing account number spaces - Add cross-validation tests for pipeline and field_extractor - Update UI layout for compact upload and full-width results Key changes: - machine_code_parser.py: Handle spaces in Bankgiro numbers (e.g. "78 2 1 713") - pipeline.py: OCR and Amount override from payment_line, BG/PG comparison only - field_extractor.py: Improved invoice number normalization - app.py: Responsive UI layout changes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
229
src/web/app.py
229
src/web/app.py
@@ -81,6 +81,9 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
|
||||
- Bankgiro
|
||||
- Plusgiro
|
||||
- Amount
|
||||
- supplier_org_number (Swedish organization number)
|
||||
- customer_number
|
||||
- payment_line (machine-readable payment code)
|
||||
""",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
@@ -161,17 +164,11 @@ def get_html_ui() -> str:
|
||||
}
|
||||
|
||||
.main-content {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
@media (max-width: 900px) {
|
||||
.main-content {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
.card {
|
||||
background: white;
|
||||
border-radius: 16px;
|
||||
@@ -188,14 +185,28 @@ def get_html_ui() -> str:
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.upload-card {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.upload-card h2 {
|
||||
margin-bottom: 0;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.upload-area {
|
||||
border: 3px dashed #ddd;
|
||||
border-radius: 12px;
|
||||
padding: 40px;
|
||||
border: 2px dashed #ddd;
|
||||
border-radius: 10px;
|
||||
padding: 15px 25px;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s;
|
||||
background: #fafafa;
|
||||
flex: 1;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
.upload-area:hover, .upload-area.dragover {
|
||||
@@ -209,17 +220,21 @@ def get_html_ui() -> str:
|
||||
}
|
||||
|
||||
.upload-icon {
|
||||
font-size: 48px;
|
||||
margin-bottom: 15px;
|
||||
font-size: 24px;
|
||||
display: inline;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.upload-area p {
|
||||
color: #666;
|
||||
margin-bottom: 10px;
|
||||
margin: 0;
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.upload-area small {
|
||||
color: #999;
|
||||
display: block;
|
||||
margin-top: 5px;
|
||||
}
|
||||
|
||||
#file-input {
|
||||
@@ -237,10 +252,10 @@ def get_html_ui() -> str:
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 14px 28px;
|
||||
padding: 12px 24px;
|
||||
border: none;
|
||||
border-radius: 10px;
|
||||
font-size: 1rem;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s;
|
||||
@@ -251,8 +266,6 @@ def get_html_ui() -> str:
|
||||
.btn-primary {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
width: 100%;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.btn-primary:hover:not(:disabled) {
|
||||
@@ -267,22 +280,21 @@ def get_html_ui() -> str:
|
||||
|
||||
.loading {
|
||||
display: none;
|
||||
text-align: center;
|
||||
padding: 20px;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.loading.active {
|
||||
display: block;
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
border: 4px solid #f3f3f3;
|
||||
border-top: 4px solid #667eea;
|
||||
width: 24px;
|
||||
height: 24px;
|
||||
border: 3px solid #f3f3f3;
|
||||
border-top: 3px solid #667eea;
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 0 auto 15px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
@@ -331,7 +343,7 @@ def get_html_ui() -> str:
|
||||
|
||||
.fields-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
@@ -380,6 +392,84 @@ def get_html_ui() -> str:
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.cross-validation {
|
||||
background: #f8fafc;
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 10px;
|
||||
padding: 15px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.cross-validation h3 {
|
||||
margin: 0 0 10px 0;
|
||||
color: #334155;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.cv-status {
|
||||
font-weight: 600;
|
||||
padding: 8px 12px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 10px;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.cv-status.valid {
|
||||
background: #dcfce7;
|
||||
color: #166534;
|
||||
}
|
||||
|
||||
.cv-status.invalid {
|
||||
background: #fef3c7;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.cv-details {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.cv-item {
|
||||
background: white;
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 6px;
|
||||
padding: 6px 12px;
|
||||
font-size: 0.85rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.cv-item.match {
|
||||
border-color: #86efac;
|
||||
background: #f0fdf4;
|
||||
}
|
||||
|
||||
.cv-item.mismatch {
|
||||
border-color: #fca5a5;
|
||||
background: #fef2f2;
|
||||
}
|
||||
|
||||
.cv-icon {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.cv-item.match .cv-icon {
|
||||
color: #16a34a;
|
||||
}
|
||||
|
||||
.cv-item.mismatch .cv-icon {
|
||||
color: #dc2626;
|
||||
}
|
||||
|
||||
.cv-summary {
|
||||
margin-top: 10px;
|
||||
font-size: 0.8rem;
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
background: #fee2e2;
|
||||
color: #991b1b;
|
||||
@@ -405,33 +495,35 @@ def get_html_ui() -> str:
|
||||
</header>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="card">
|
||||
<h2>📤 Upload Document</h2>
|
||||
<!-- Upload Section - Compact -->
|
||||
<div class="card upload-card">
|
||||
<h2>📤 Upload</h2>
|
||||
|
||||
<div class="upload-area" id="upload-area">
|
||||
<div class="upload-icon">📁</div>
|
||||
<p>Drag & drop your file here</p>
|
||||
<p>or <strong>click to browse</strong></p>
|
||||
<small>Supports PDF, PNG, JPG (max 50MB)</small>
|
||||
<span class="upload-icon">📁</span>
|
||||
<p>Drag & drop or <strong>click to browse</strong></p>
|
||||
<small>PDF, PNG, JPG (max 50MB)</small>
|
||||
<input type="file" id="file-input" accept=".pdf,.png,.jpg,.jpeg">
|
||||
<div class="file-name" id="file-name" style="display: none;"></div>
|
||||
</div>
|
||||
|
||||
<div class="file-name" id="file-name" style="display: none;"></div>
|
||||
|
||||
<button class="btn btn-primary" id="submit-btn" disabled>
|
||||
🚀 Extract Fields
|
||||
🚀 Extract
|
||||
</button>
|
||||
|
||||
<div class="loading" id="loading">
|
||||
<div class="spinner"></div>
|
||||
<p>Processing document...</p>
|
||||
<p>Processing...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Results Section - Full Width -->
|
||||
<div class="card">
|
||||
<h2>📊 Extraction Results</h2>
|
||||
|
||||
<div id="placeholder" style="text-align: center; padding: 40px; color: #999;">
|
||||
<div style="font-size: 64px; margin-bottom: 15px;">🔍</div>
|
||||
<div id="placeholder" style="text-align: center; padding: 30px; color: #999;">
|
||||
<div style="font-size: 48px; margin-bottom: 10px;">🔍</div>
|
||||
<p>Upload a document to see extraction results</p>
|
||||
</div>
|
||||
|
||||
@@ -445,6 +537,8 @@ def get_html_ui() -> str:
|
||||
|
||||
<div class="processing-time" id="processing-time"></div>
|
||||
|
||||
<div class="cross-validation" id="cross-validation" style="display: none;"></div>
|
||||
|
||||
<div class="error-message" id="error-message" style="display: none;"></div>
|
||||
|
||||
<div class="visualization" id="visualization" style="display: none;">
|
||||
@@ -566,7 +660,11 @@ def get_html_ui() -> str:
|
||||
const fieldsGrid = document.getElementById('fields-grid');
|
||||
fieldsGrid.innerHTML = '';
|
||||
|
||||
const fieldOrder = ['InvoiceNumber', 'InvoiceDate', 'InvoiceDueDate', 'OCR', 'Amount', 'Bankgiro', 'Plusgiro'];
|
||||
const fieldOrder = [
|
||||
'InvoiceNumber', 'InvoiceDate', 'InvoiceDueDate', 'OCR',
|
||||
'Amount', 'Bankgiro', 'Plusgiro',
|
||||
'supplier_org_number', 'customer_number', 'payment_line'
|
||||
];
|
||||
|
||||
fieldOrder.forEach(field => {
|
||||
const value = result.fields[field];
|
||||
@@ -588,6 +686,45 @@ def get_html_ui() -> str:
|
||||
document.getElementById('processing-time').textContent =
|
||||
`⏱️ Processed in ${result.processing_time_ms.toFixed(0)}ms`;
|
||||
|
||||
// Cross-validation results
|
||||
const cvDiv = document.getElementById('cross-validation');
|
||||
if (result.cross_validation) {
|
||||
const cv = result.cross_validation;
|
||||
let cvHtml = '<h3>🔍 Cross-Validation (Payment Line)</h3>';
|
||||
cvHtml += `<div class="cv-status ${cv.is_valid ? 'valid' : 'invalid'}">`;
|
||||
cvHtml += cv.is_valid ? '✅ Valid' : '⚠️ Mismatch Detected';
|
||||
cvHtml += '</div>';
|
||||
|
||||
cvHtml += '<div class="cv-details">';
|
||||
if (cv.payment_line_ocr) {
|
||||
const matchIcon = cv.ocr_match === true ? '✓' : (cv.ocr_match === false ? '✗' : '—');
|
||||
cvHtml += `<div class="cv-item ${cv.ocr_match === true ? 'match' : (cv.ocr_match === false ? 'mismatch' : '')}">`;
|
||||
cvHtml += `<span class="cv-icon">${matchIcon}</span> OCR: ${cv.payment_line_ocr}</div>`;
|
||||
}
|
||||
if (cv.payment_line_amount) {
|
||||
const matchIcon = cv.amount_match === true ? '✓' : (cv.amount_match === false ? '✗' : '—');
|
||||
cvHtml += `<div class="cv-item ${cv.amount_match === true ? 'match' : (cv.amount_match === false ? 'mismatch' : '')}">`;
|
||||
cvHtml += `<span class="cv-icon">${matchIcon}</span> Amount: ${cv.payment_line_amount}</div>`;
|
||||
}
|
||||
if (cv.payment_line_account) {
|
||||
const accountType = cv.payment_line_account_type === 'bankgiro' ? 'Bankgiro' : 'Plusgiro';
|
||||
const matchField = cv.payment_line_account_type === 'bankgiro' ? cv.bankgiro_match : cv.plusgiro_match;
|
||||
const matchIcon = matchField === true ? '✓' : (matchField === false ? '✗' : '—');
|
||||
cvHtml += `<div class="cv-item ${matchField === true ? 'match' : (matchField === false ? 'mismatch' : '')}">`;
|
||||
cvHtml += `<span class="cv-icon">${matchIcon}</span> ${accountType}: ${cv.payment_line_account}</div>`;
|
||||
}
|
||||
cvHtml += '</div>';
|
||||
|
||||
if (cv.details && cv.details.length > 0) {
|
||||
cvHtml += '<div class="cv-summary">' + cv.details[cv.details.length - 1] + '</div>';
|
||||
}
|
||||
|
||||
cvDiv.innerHTML = cvHtml;
|
||||
cvDiv.style.display = 'block';
|
||||
} else {
|
||||
cvDiv.style.display = 'none';
|
||||
}
|
||||
|
||||
// Visualization
|
||||
if (result.visualization_url) {
|
||||
const vizDiv = document.getElementById('visualization');
|
||||
@@ -608,7 +745,19 @@ def get_html_ui() -> str:
|
||||
}
|
||||
|
||||
function formatFieldName(name) {
|
||||
return name.replace(/([A-Z])/g, ' $1').trim();
|
||||
const nameMap = {
|
||||
'InvoiceNumber': 'Invoice Number',
|
||||
'InvoiceDate': 'Invoice Date',
|
||||
'InvoiceDueDate': 'Due Date',
|
||||
'OCR': 'OCR Reference',
|
||||
'Amount': 'Amount',
|
||||
'Bankgiro': 'Bankgiro',
|
||||
'Plusgiro': 'Plusgiro',
|
||||
'supplier_org_number': 'Supplier Org Number',
|
||||
'customer_number': 'Customer Number',
|
||||
'payment_line': 'Payment Line'
|
||||
};
|
||||
return nameMap[name] || name.replace(/([A-Z])/g, ' $1').replace(/_/g, ' ').trim();
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
|
||||
Reference in New Issue
Block a user