fix: change default OCR language from English to Swedish

Project targets Swedish invoice extraction. PaddleOCR sv model provides
better recognition of Swedish-specific characters (å, ä, ö).
This commit is contained in:
Yaojia Wang
2026-02-12 23:19:51 +01:00
parent 58d36c8927
commit d8f2acb762
6 changed files with 8 additions and 8 deletions

View File

@@ -54,8 +54,8 @@ def main():
)
parser.add_argument(
'--lang',
default='en',
help='OCR language (default: en)'
default='sv',
help='OCR language (default: sv)'
)
parser.add_argument(
'--gpu',

View File

@@ -85,7 +85,7 @@ class FieldExtractor:
def __init__(
self,
ocr_lang: str = 'en',
ocr_lang: str = 'sv',
use_gpu: bool = False,
bbox_padding: float = 0.1,
dpi: int = 300,

View File

@@ -209,7 +209,7 @@ class InferencePipeline:
self,
model_path: str | Path,
confidence_threshold: float = 0.5,
ocr_lang: str = 'en',
ocr_lang: str = 'sv',
use_gpu: bool = False,
dpi: int = 300,
enable_fallback: bool = True,

View File

@@ -37,7 +37,7 @@ class AutoLabelService:
def ocr_engine(self) -> OCREngine:
"""Lazy initialization of OCR engine."""
if self._ocr_engine is None:
self._ocr_engine = OCREngine(lang="en")
self._ocr_engine = OCREngine(lang="sv")
return self._ocr_engine
def auto_label_document(