Initial commit: Invoice field extraction system using YOLO + OCR
Features: - Auto-labeling pipeline: CSV values -> PDF search -> YOLO annotations - Flexible date matching: year-month match, nearby date tolerance - PDF text extraction with PyMuPDF - OCR support for scanned documents (PaddleOCR) - YOLO training and inference pipeline - 7 field types: InvoiceNumber, InvoiceDate, InvoiceDueDate, OCR, Bankgiro, Plusgiro, Amount Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
59
configs/training.yaml
Normal file
59
configs/training.yaml
Normal file
@@ -0,0 +1,59 @@
|
||||
# YOLO Training Configuration
|
||||
# Use with: yolo train data=dataset.yaml cfg=training.yaml
|
||||
|
||||
# Model
|
||||
model: yolov8s.pt
|
||||
|
||||
# Training hyperparameters
|
||||
epochs: 100
|
||||
patience: 20 # Early stopping patience
|
||||
batch: 16
|
||||
imgsz: 1280
|
||||
|
||||
# Optimizer
|
||||
optimizer: AdamW
|
||||
lr0: 0.001 # Initial learning rate
|
||||
lrf: 0.01 # Final learning rate factor
|
||||
momentum: 0.937
|
||||
weight_decay: 0.0005
|
||||
|
||||
# Warmup
|
||||
warmup_epochs: 3
|
||||
warmup_momentum: 0.8
|
||||
warmup_bias_lr: 0.1
|
||||
|
||||
# Loss weights
|
||||
box: 7.5 # Box loss gain
|
||||
cls: 0.5 # Class loss gain
|
||||
dfl: 1.5 # DFL loss gain
|
||||
|
||||
# Augmentation
|
||||
# Keep minimal for document images
|
||||
hsv_h: 0.0 # No hue augmentation
|
||||
hsv_s: 0.1 # Slight saturation
|
||||
hsv_v: 0.2 # Brightness variation
|
||||
degrees: 5.0 # Rotation ±5°
|
||||
translate: 0.05 # Translation
|
||||
scale: 0.2 # Scale ±20%
|
||||
shear: 0.0 # No shear
|
||||
perspective: 0.0 # No perspective
|
||||
flipud: 0.0 # No vertical flip
|
||||
fliplr: 0.0 # No horizontal flip
|
||||
mosaic: 0.0 # Disable mosaic (not suitable for documents)
|
||||
mixup: 0.0 # Disable mixup
|
||||
copy_paste: 0.0 # Disable copy-paste
|
||||
|
||||
# Validation
|
||||
val: true
|
||||
save: true
|
||||
save_period: 10
|
||||
cache: true
|
||||
|
||||
# Other
|
||||
device: 0 # GPU device (0, 1, etc.) or 'cpu'
|
||||
workers: 8
|
||||
project: runs/train
|
||||
name: invoice_fields
|
||||
exist_ok: true
|
||||
pretrained: true
|
||||
verbose: true
|
||||
Reference in New Issue
Block a user