diff --git a/packages/backend/backend/cli/infer.py b/packages/backend/backend/cli/infer.py index 76957d2..04eb908 100644 --- a/packages/backend/backend/cli/infer.py +++ b/packages/backend/backend/cli/infer.py @@ -54,8 +54,8 @@ def main(): ) parser.add_argument( '--lang', - default='en', - help='OCR language (default: en)' + default='sv', + help='OCR language (default: sv)' ) parser.add_argument( '--gpu', diff --git a/packages/backend/backend/pipeline/field_extractor.py b/packages/backend/backend/pipeline/field_extractor.py index 7a7eb76..c30b100 100644 --- a/packages/backend/backend/pipeline/field_extractor.py +++ b/packages/backend/backend/pipeline/field_extractor.py @@ -85,7 +85,7 @@ class FieldExtractor: def __init__( self, - ocr_lang: str = 'en', + ocr_lang: str = 'sv', use_gpu: bool = False, bbox_padding: float = 0.1, dpi: int = 300, diff --git a/packages/backend/backend/pipeline/pipeline.py b/packages/backend/backend/pipeline/pipeline.py index 5b41a56..a0d6e58 100644 --- a/packages/backend/backend/pipeline/pipeline.py +++ b/packages/backend/backend/pipeline/pipeline.py @@ -209,7 +209,7 @@ class InferencePipeline: self, model_path: str | Path, confidence_threshold: float = 0.5, - ocr_lang: str = 'en', + ocr_lang: str = 'sv', use_gpu: bool = False, dpi: int = 300, enable_fallback: bool = True, diff --git a/packages/backend/backend/web/services/autolabel.py b/packages/backend/backend/web/services/autolabel.py index a88f5b1..6bfd13a 100644 --- a/packages/backend/backend/web/services/autolabel.py +++ b/packages/backend/backend/web/services/autolabel.py @@ -37,7 +37,7 @@ class AutoLabelService: def ocr_engine(self) -> OCREngine: """Lazy initialization of OCR engine.""" if self._ocr_engine is None: - self._ocr_engine = OCREngine(lang="en") + self._ocr_engine = OCREngine(lang="sv") return self._ocr_engine def auto_label_document( diff --git a/packages/shared/shared/ocr/paddle_ocr.py b/packages/shared/shared/ocr/paddle_ocr.py index 76c5775..eeba1b5 100644 --- a/packages/shared/shared/ocr/paddle_ocr.py +++ b/packages/shared/shared/ocr/paddle_ocr.py @@ -58,7 +58,7 @@ class OCREngine: def __init__( self, - lang: str = "en", + lang: str = "sv", det_model_dir: str | None = None, rec_model_dir: str | None = None, use_doc_orientation_classify: bool = True, @@ -387,7 +387,7 @@ class OCREngine: def extract_ocr_tokens( image_path: str | Path, - lang: str = "en", + lang: str = "sv", page_no: int = 0 ) -> list[OCRToken]: """ diff --git a/packages/training/training/processing/gpu_pool.py b/packages/training/training/processing/gpu_pool.py index 7f4286e..b23bf28 100644 --- a/packages/training/training/processing/gpu_pool.py +++ b/packages/training/training/processing/gpu_pool.py @@ -48,7 +48,7 @@ def _init_gpu_worker(gpu_id: int = 0) -> None: from paddleocr import PaddleOCR # PaddleOCR 3.x init - minimal params, GPU controlled via paddle.set_device - _ocr_instance = PaddleOCR(lang="en") + _ocr_instance = PaddleOCR(lang="sv") _gpu_initialized = True logger.info(f"GPU worker initialized on GPU {gpu_id} in process {os.getpid()}")