Files
invoice-master-poc-v2/packages/shared/shared/pdf/renderer.py
2026-01-27 23:58:17 +01:00

118 lines
2.8 KiB
Python

"""
PDF Rendering Module
Converts PDF pages to images for YOLO training.
"""
from pathlib import Path
from typing import Generator
import fitz # PyMuPDF
def render_pdf_to_images(
pdf_path: str | Path,
output_dir: str | Path | None = None,
dpi: int = 300,
image_format: str = "png"
) -> Generator[tuple[int, Path | bytes], None, None]:
"""
Render PDF pages to images.
Args:
pdf_path: Path to the PDF file
output_dir: Directory to save images (if None, returns bytes)
dpi: Resolution for rendering (default 300)
image_format: Output format ('png' or 'jpg')
Yields:
Tuple of (page_number, image_path or image_bytes)
"""
doc = fitz.open(pdf_path)
# Calculate zoom factor for desired DPI (72 is base DPI for PDF)
zoom = dpi / 72
matrix = fitz.Matrix(zoom, zoom)
if output_dir:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
pdf_name = Path(pdf_path).stem
for page_no, page in enumerate(doc):
# Render page to pixmap
pix = page.get_pixmap(matrix=matrix)
if output_dir:
# Save to file
ext = "jpg" if image_format.lower() in ("jpg", "jpeg") else "png"
image_path = output_dir / f"{pdf_name}_page_{page_no:03d}.{ext}"
if ext == "jpg":
pix.save(str(image_path), "jpeg")
else:
pix.save(str(image_path))
yield page_no, image_path
else:
# Return bytes
if image_format.lower() in ("jpg", "jpeg"):
yield page_no, pix.tobytes("jpeg")
else:
yield page_no, pix.tobytes("png")
doc.close()
def render_page_to_image(
pdf_path: str | Path,
page_no: int,
dpi: int = 300
) -> bytes:
"""
Render a single page to image bytes.
Args:
pdf_path: Path to the PDF file
page_no: Page number (0-indexed)
dpi: Resolution for rendering
Returns:
PNG image bytes
"""
doc = fitz.open(pdf_path)
if page_no >= len(doc):
doc.close()
raise ValueError(f"Page {page_no} does not exist (PDF has {len(doc)} pages)")
zoom = dpi / 72
matrix = fitz.Matrix(zoom, zoom)
page = doc[page_no]
pix = page.get_pixmap(matrix=matrix)
image_bytes = pix.tobytes("png")
doc.close()
return image_bytes
def get_render_dimensions(pdf_path: str | Path, page_no: int = 0, dpi: int = 300) -> tuple[int, int]:
"""
Get the dimensions of a rendered page.
Returns:
(width, height) in pixels
"""
doc = fitz.open(pdf_path)
page = doc[page_no]
zoom = dpi / 72
rect = page.rect
width = int(rect.width * zoom)
height = int(rect.height * zoom)
doc.close()
return width, height