Init project
This commit is contained in:
27
app/core/ocr.py
Normal file
27
app/core/ocr.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# app/core/ocr.py
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# 注意: 您需要先在您的系统中安装Google的Tesseract OCR引擎。
|
||||
# 详情请参考之前的安装说明。
|
||||
|
||||
def extract_text_from_image(image: Image.Image) -> str:
|
||||
"""
|
||||
使用Tesseract OCR从Pillow Image对象中提取文本。
|
||||
|
||||
参数:
|
||||
image: Pillow Image对象。
|
||||
|
||||
返回:
|
||||
从图片中提取出的字符串文本。
|
||||
"""
|
||||
try:
|
||||
print("--- [Core OCR] 正在从图片中提取文本用于分类...")
|
||||
# lang='chi_sim+eng' 表示同时识别简体中文和英文
|
||||
text = pytesseract.image_to_string(image, lang='chi_sim+eng')
|
||||
print("--- [Core OCR] 文本提取成功。")
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"--- [Core OCR] OCR处理失败: {e}")
|
||||
raise IOError(f"OCR processing failed: {e}")
|
||||
Reference in New Issue
Block a user