Files
AmazingDoc/app/core/pdf_processor.py
2025-08-11 14:20:56 +02:00

24 lines
778 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# app/core/pdf_processor.py
from pdf2image import convert_from_bytes
from PIL import Image
from io import BytesIO
from typing import List
import base64
def convert_pdf_to_images(pdf_bytes: bytes) -> List[Image.Image]:
try:
print("--- [Core PDF] Converting PDF to images...")
images = convert_from_bytes(pdf_bytes)
print(f"--- [Core PDF] converted PDF to imagestotal {len(images)} pages。")
return images
except Exception as e:
print(f"--- [Core PDF] PDF conversion failed: {e}")
raise IOError(f"PDF to image conversion failed: {e}")
def image_to_base64_str(image: Image.Image) -> str:
buffered = BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')