Initial commit: Invoice field extraction system using YOLO + OCR
Features: - Auto-labeling pipeline: CSV values -> PDF search -> YOLO annotations - Flexible date matching: year-month match, nearby date tolerance - PDF text extraction with PyMuPDF - OCR support for scanned documents (PaddleOCR) - YOLO training and inference pipeline - 7 field types: InvoiceNumber, InvoiceDate, InvoiceDueDate, OCR, Bankgiro, Plusgiro, Amount Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
78
pyproject.toml
Normal file
78
pyproject.toml
Normal file
@@ -0,0 +1,78 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=68.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "invoice-master"
|
||||
version = "2.0.0"
|
||||
description = "Automatic invoice information extraction using YOLO + OCR"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
{name = "Invoice Master Team"}
|
||||
]
|
||||
keywords = ["invoice", "ocr", "yolo", "document-processing", "pdf"]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"PyMuPDF>=1.23.0",
|
||||
"paddlepaddle>=2.5.0",
|
||||
"paddleocr>=2.7.0",
|
||||
"ultralytics>=8.1.0",
|
||||
"Pillow>=10.0.0",
|
||||
"numpy>=1.24.0",
|
||||
"opencv-python>=4.8.0",
|
||||
"pyyaml>=6.0",
|
||||
"tqdm>=4.65.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=7.0.0",
|
||||
"pytest-cov>=4.0.0",
|
||||
"black>=23.0.0",
|
||||
"ruff>=0.1.0",
|
||||
"mypy>=1.0.0",
|
||||
]
|
||||
gpu = [
|
||||
"paddlepaddle-gpu>=2.5.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
invoice-autolabel = "src.cli.autolabel:main"
|
||||
invoice-train = "src.cli.train:main"
|
||||
invoice-infer = "src.cli.infer:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["src*"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 100
|
||||
target-version = ["py310", "py311", "py312"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py310"
|
||||
select = ["E", "F", "W", "I", "N", "D", "UP", "B", "C4", "SIM"]
|
||||
ignore = ["D100", "D104"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.10"
|
||||
warn_return_any = true
|
||||
warn_unused_ignores = true
|
||||
disallow_untyped_defs = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
addopts = "-v --cov=src --cov-report=term-missing"
|
||||
Reference in New Issue
Block a user