From 1b7c61cdd892b517f3389a5fbe14f3a674267ae6 Mon Sep 17 00:00:00 2001 From: Yaojia Wang Date: Sat, 10 Jan 2026 18:29:02 +0100 Subject: [PATCH] Enable GPU by default for PaddleOCR - Changed use_gpu default from False to True - Added use_gpu parameter to PaddleOCR init - Added show_log=False to reduce log noise GPU acceleration significantly improves OCR performance and reduces memory pressure when processing scanned PDFs. Co-Authored-By: Claude Opus 4.5 --- src/ocr/paddle_ocr.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ocr/paddle_ocr.py b/src/ocr/paddle_ocr.py index 1973493..520d94b 100644 --- a/src/ocr/paddle_ocr.py +++ b/src/ocr/paddle_ocr.py @@ -45,7 +45,7 @@ class OCREngine: def __init__( self, lang: str = "en", - use_gpu: bool = False, + use_gpu: bool = True, # Default to GPU for better performance det_model_dir: str | None = None, rec_model_dir: str | None = None ): @@ -54,20 +54,25 @@ class OCREngine: Args: lang: Language code ('en', 'sv', 'ch', etc.) - use_gpu: Whether to use GPU acceleration + use_gpu: Whether to use GPU acceleration (default: True) det_model_dir: Custom detection model directory rec_model_dir: Custom recognition model directory """ from paddleocr import PaddleOCR - # PaddleOCR 3.x API - simplified init - init_params = {'lang': lang} + # PaddleOCR init with GPU support + init_params = { + 'lang': lang, + 'use_gpu': use_gpu, + 'show_log': False, # Reduce log noise + } if det_model_dir: init_params['text_detection_model_dir'] = det_model_dir if rec_model_dir: init_params['text_recognition_model_dir'] = rec_model_dir self.ocr = PaddleOCR(**init_params) + self.use_gpu = use_gpu def extract_from_image( self,