Files
invoice-master-poc-v2/scripts/setup_wsl.sh
Yaojia Wang 8938661850 Initial commit: Invoice field extraction system using YOLO + OCR
Features:
- Auto-labeling pipeline: CSV values -> PDF search -> YOLO annotations
- Flexible date matching: year-month match, nearby date tolerance
- PDF text extraction with PyMuPDF
- OCR support for scanned documents (PaddleOCR)
- YOLO training and inference pipeline
- 7 field types: InvoiceNumber, InvoiceDate, InvoiceDueDate, OCR, Bankgiro, Plusgiro, Amount

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 17:44:14 +01:00

81 lines
2.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# WSL 环境安装脚本
# 使用方法: bash scripts/setup_wsl.sh
set -e
echo "=========================================="
echo "Invoice Master POC v2 - WSL 安装脚本"
echo "=========================================="
# 检查是否在 WSL 中运行
if ! grep -qi microsoft /proc/version 2>/dev/null; then
echo "警告: 未检测到 WSL 环境,请在 WSL 中运行此脚本"
echo "提示: 在 Windows 终端中输入 'wsl' 进入 WSL"
exit 1
fi
echo ""
echo "[1/5] 更新系统包..."
sudo apt update
echo ""
echo "[2/5] 安装系统依赖..."
sudo apt install -y \
python3.10 \
python3.10-venv \
python3-pip \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxrender1 \
libxext6 \
libgomp1
echo ""
echo "[3/5] 创建 Python 虚拟环境..."
if [ -d "venv" ]; then
echo "虚拟环境已存在,跳过创建"
else
python3 -m venv venv
fi
echo ""
echo "[4/5] 激活虚拟环境并安装依赖..."
source venv/bin/activate
pip install --upgrade pip
echo ""
echo "安装 Python 依赖包..."
pip install -r requirements.txt
echo ""
echo "[5/5] 验证安装..."
python3 -c "import fitz; print(f'PyMuPDF: {fitz.version}')"
python3 -c "from ultralytics import YOLO; print('Ultralytics: OK')"
python3 -c "from paddleocr import PaddleOCR; print('PaddleOCR: OK')"
echo ""
echo "=========================================="
echo "安装完成!"
echo "=========================================="
echo ""
echo "使用方法:"
echo " 1. 激活虚拟环境: source venv/bin/activate"
echo " 2. 运行自动标注: python -m src.cli.autolabel --help"
echo " 3. 训练模型: python -m src.cli.train --help"
echo " 4. 推理: python -m src.cli.infer --help"
echo ""
# 检查 GPU
echo "检查 GPU 支持..."
if command -v nvidia-smi &> /dev/null; then
echo "检测到 NVIDIA GPU:"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
echo ""
echo "提示: 运行以下命令启用 GPU 加速:"
echo " pip install paddlepaddle-gpu"
else
echo "未检测到 GPU将使用 CPU 模式"
fi