#!/usr/bin/env python3 """Render selected PDFs from v3 batch for visual comparison.""" import os import fitz # PyMuPDF PDF_DIR = "/mnt/c/Users/yaoji/git/Billo/Billo.Platform.Document/Billo.Platform.Document.AdminAPI/downloads/to_check" OUTPUT_DIR = "/mnt/c/Users/yaoji/git/ColaCoder/invoice-master-poc-v2/scripts/pdf_renders_v3" # Select 10 PDFs covering different scenarios: SELECTED = [ # Potentially wrong Amount (81648164.00 - too high?) "b84c7d70-821d-4a1a-9be7-d7bb2392bd91.pdf", # Only 2 fields extracted "072571e2-da5f-4268-b1a8-f0e5a85a3ec4.pdf", # InvoiceNumber=5085 (suspiciously short, same as BG prefix?) "6a83ba35-afdf-4c13-ade1-25513e213637.pdf", # InvoiceNumber=450 (very short, might be wrong) "8551b540-d93d-459d-b7eb-e9ee086f9f16.pdf", # InvoiceNumber=134 (very short, same as BG prefix) "cb1bd3b1-63d0-4140-930f-e4a7ae2b6cd5.pdf", # Large Amount=172904.52, InvoiceNumber=89902 "d121a5ee-7382-41d8-8010-63880def1f96.pdf", # Good 9-field PDF for positive check "6cb90895-e52b-4831-b57b-7cb968bcdd54.pdf", # Amount=2026.00 (same as year - could be confused?) "d376c5b5-0dc5-4ccf-b787-0d481eef8577.pdf", # 8 fields, good coverage "f3f5da6f-7552-4ec6-8625-3629042fbfd0.pdf", # Low confidence Amount=596.49 "5783e4af-eef3-411c-84b1-3a8f4694fed8.pdf", ] os.makedirs(OUTPUT_DIR, exist_ok=True) for pdf_name in SELECTED: pdf_path = os.path.join(PDF_DIR, pdf_name) if not os.path.exists(pdf_path): print(f"SKIP {pdf_name} - not found") continue doc = fitz.open(pdf_path) page = doc[0] mat = fitz.Matrix(150 / 72, 150 / 72) pix = page.get_pixmap(matrix=mat) out_name = pdf_name.replace(".pdf", ".png") out_path = os.path.join(OUTPUT_DIR, out_name) pix.save(out_path) print(f"Rendered {pdf_name} -> {out_name} ({pix.width}x{pix.height})") doc.close() print(f"\nAll renders saved to {OUTPUT_DIR}")