#!/usr/bin/env python3 """Analyze batch inference v3 results (Round 2 fixes).""" import json from collections import Counter with open("scripts/inference_results_v3.json") as f: results = json.load(f) total = len(results) success = sum(1 for r in results if r["status"] == 200) print(f"Total PDFs: {total}, Successful: {success}") print() # Summary table header = f"{'PDF':<40} {'Det':<4} {'Fld':<4} {'Time':<7} Fields" print(header) print("-" * 140) for r in results: fn = r["filename"][:39] data = r.get("data", {}) result_data = data.get("result", {}) fields = result_data.get("fields", {}) dets = len(result_data.get("detections", [])) nfields = len(fields) t = r["time_seconds"] parts = [] for k, v in fields.items(): sv = str(v) if len(sv) > 30: sv = sv[:27] + "..." parts.append(f"{k}={sv}") field_str = ", ".join(parts) print(f"{fn:<40} {dets:<4} {nfields:<4} {t:<7} {field_str}") print() # Field coverage field_counts: Counter = Counter() conf_sums: Counter = Counter() ok_count = 0 for r in results: if r["status"] != 200: continue ok_count += 1 result_data = r["data"]["result"] for k in result_data.get("fields", {}): field_counts[k] += 1 for k, v in (result_data.get("confidence") or {}).items(): conf_sums[k] += v print(f"Field Coverage ({ok_count} successful PDFs):") hdr = f"{'Field':<35} {'Present':<10} {'Rate':<10} {'Avg Conf':<10}" print(hdr) print("-" * 65) for field in [ "InvoiceNumber", "InvoiceDate", "InvoiceDueDate", "OCR", "Amount", "Bankgiro", "Plusgiro", "supplier_organisation_number", "customer_number", "payment_line", ]: cnt = field_counts.get(field, 0) rate = cnt / ok_count * 100 if ok_count else 0 avg_conf = conf_sums.get(field, 0) / cnt if cnt else 0 flag = "" if rate < 30: flag = " <<<" elif rate < 60: flag = " !!" print(f"{field:<35} {cnt:<10} {rate:<10.1f} {avg_conf:<10.3f}{flag}") # Fallback count fb_count = 0 for r in results: if r["status"] == 200: result_data = r["data"]["result"] if result_data.get("fallback_used"): fb_count += 1 print(f"\nFallback used: {fb_count}/{ok_count}") # Low-confidence fields print("\nLow-confidence extractions (< 0.7):") for r in results: if r["status"] != 200: continue result_data = r["data"]["result"] for k, v in (result_data.get("confidence") or {}).items(): if v < 0.7: fv = result_data.get("fields", {}).get(k, "?") print(f" [{v:.3f}] {k:<25} = {str(fv)[:40]:<40} ({r['filename'][:36]})") # PDFs with very few fields (possible issues) print("\nPDFs with <= 2 fields extracted:") for r in results: if r["status"] != 200: continue result_data = r["data"]["result"] fields = result_data.get("fields", {}) if len(fields) <= 2: print(f" {r['filename']}: {len(fields)} fields - {list(fields.keys())}") # Avg time avg_time = sum(r["time_seconds"] for r in results) / len(results) print(f"\nAverage processing time: {avg_time:.2f}s")