fix: change default OCR language from English to Swedish
Project targets Swedish invoice extraction. PaddleOCR sv model provides better recognition of Swedish-specific characters (å, ä, ö).
This commit is contained in:
@@ -54,8 +54,8 @@ def main():
|
||||
)
|
||||
parser.add_argument(
|
||||
'--lang',
|
||||
default='en',
|
||||
help='OCR language (default: en)'
|
||||
default='sv',
|
||||
help='OCR language (default: sv)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--gpu',
|
||||
|
||||
@@ -85,7 +85,7 @@ class FieldExtractor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ocr_lang: str = 'en',
|
||||
ocr_lang: str = 'sv',
|
||||
use_gpu: bool = False,
|
||||
bbox_padding: float = 0.1,
|
||||
dpi: int = 300,
|
||||
|
||||
@@ -209,7 +209,7 @@ class InferencePipeline:
|
||||
self,
|
||||
model_path: str | Path,
|
||||
confidence_threshold: float = 0.5,
|
||||
ocr_lang: str = 'en',
|
||||
ocr_lang: str = 'sv',
|
||||
use_gpu: bool = False,
|
||||
dpi: int = 300,
|
||||
enable_fallback: bool = True,
|
||||
|
||||
@@ -37,7 +37,7 @@ class AutoLabelService:
|
||||
def ocr_engine(self) -> OCREngine:
|
||||
"""Lazy initialization of OCR engine."""
|
||||
if self._ocr_engine is None:
|
||||
self._ocr_engine = OCREngine(lang="en")
|
||||
self._ocr_engine = OCREngine(lang="sv")
|
||||
return self._ocr_engine
|
||||
|
||||
def auto_label_document(
|
||||
|
||||
Reference in New Issue
Block a user