Enable GPU by default for PaddleOCR

- Changed use_gpu default from False to True
- Added use_gpu parameter to PaddleOCR init
- Added show_log=False to reduce log noise

GPU acceleration significantly improves OCR performance and
reduces memory pressure when processing scanned PDFs.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Yaojia Wang
2026-01-10 18:29:02 +01:00
parent dd69fbe9ed
commit 1b7c61cdd8

View File

@@ -45,7 +45,7 @@ class OCREngine:
def __init__( def __init__(
self, self,
lang: str = "en", lang: str = "en",
use_gpu: bool = False, use_gpu: bool = True, # Default to GPU for better performance
det_model_dir: str | None = None, det_model_dir: str | None = None,
rec_model_dir: str | None = None rec_model_dir: str | None = None
): ):
@@ -54,20 +54,25 @@ class OCREngine:
Args: Args:
lang: Language code ('en', 'sv', 'ch', etc.) lang: Language code ('en', 'sv', 'ch', etc.)
use_gpu: Whether to use GPU acceleration use_gpu: Whether to use GPU acceleration (default: True)
det_model_dir: Custom detection model directory det_model_dir: Custom detection model directory
rec_model_dir: Custom recognition model directory rec_model_dir: Custom recognition model directory
""" """
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
# PaddleOCR 3.x API - simplified init # PaddleOCR init with GPU support
init_params = {'lang': lang} init_params = {
'lang': lang,
'use_gpu': use_gpu,
'show_log': False, # Reduce log noise
}
if det_model_dir: if det_model_dir:
init_params['text_detection_model_dir'] = det_model_dir init_params['text_detection_model_dir'] = det_model_dir
if rec_model_dir: if rec_model_dir:
init_params['text_recognition_model_dir'] = rec_model_dir init_params['text_recognition_model_dir'] = rec_model_dir
self.ocr = PaddleOCR(**init_params) self.ocr = PaddleOCR(**init_params)
self.use_gpu = use_gpu
def extract_from_image( def extract_from_image(
self, self,