Files
AmazingDoc/app/core/pdf_processor.py
Yaojia Wang 0a80400720 Init project
2025-08-11 00:07:41 +02:00

43 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# app/core/pdf_processor.py
from pdf2image import convert_from_bytes
from PIL import Image
from io import BytesIO
from typing import List
import base64
# 注意: 您需要安装Poppler。
# - macOS: brew install poppler
# - Ubuntu/Debian: sudo apt-get install poppler-utils
# - Windows: 下载Poppler并将其bin目录添加到系统PATH。
def convert_pdf_to_images(pdf_bytes: bytes) -> List[Image.Image]:
"""将PDF文件的字节流转换为Pillow Image对象列表。"""
try:
print("--- [Core PDF] 正在将PDF转换为图片...")
# --- 新增代码开始 ---
# 在这里直接指定您电脑上Poppler的bin目录路径
# 请确保将下面的示例路径替换为您的真实路径
poppler_path = r"C:\ProgramData\chocolatey\lib\poppler\tools\Library\bin"
# --- 新增代码结束 ---
# --- 修改的代码开始 ---
# 在调用时传入poppler_path参数
images = convert_from_bytes(pdf_bytes)
# --- 修改的代码结束 ---
print(f"--- [Core PDF] 转换成功,共 {len(images)} 页。")
return images
except Exception as e:
print(f"--- [Core PDF] PDF转换失败: {e}")
# 增加一个更友好的错误提示
print("--- [Core PDF] 请确认您已在系统中正确安装Poppler并在上面的代码中指定了正确的poppler_path。")
raise IOError(f"PDF to image conversion failed: {e}")
def image_to_base64_str(image: Image.Image) -> str:
"""将Pillow Image对象转换为Base64编码的字符串。"""
buffered = BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')