Features: - Auto-labeling pipeline: CSV values -> PDF search -> YOLO annotations - Flexible date matching: year-month match, nearby date tolerance - PDF text extraction with PyMuPDF - OCR support for scanned documents (PaddleOCR) - YOLO training and inference pipeline - 7 field types: InvoiceNumber, InvoiceDate, InvoiceDueDate, OCR, Bankgiro, Plusgiro, Amount Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
81 lines
2.1 KiB
Bash
81 lines
2.1 KiB
Bash
#!/bin/bash
|
||
# WSL 环境安装脚本
|
||
# 使用方法: bash scripts/setup_wsl.sh
|
||
|
||
set -e
|
||
|
||
echo "=========================================="
|
||
echo "Invoice Master POC v2 - WSL 安装脚本"
|
||
echo "=========================================="
|
||
|
||
# 检查是否在 WSL 中运行
|
||
if ! grep -qi microsoft /proc/version 2>/dev/null; then
|
||
echo "警告: 未检测到 WSL 环境,请在 WSL 中运行此脚本"
|
||
echo "提示: 在 Windows 终端中输入 'wsl' 进入 WSL"
|
||
exit 1
|
||
fi
|
||
|
||
echo ""
|
||
echo "[1/5] 更新系统包..."
|
||
sudo apt update
|
||
|
||
echo ""
|
||
echo "[2/5] 安装系统依赖..."
|
||
sudo apt install -y \
|
||
python3.10 \
|
||
python3.10-venv \
|
||
python3-pip \
|
||
libgl1-mesa-glx \
|
||
libglib2.0-0 \
|
||
libsm6 \
|
||
libxrender1 \
|
||
libxext6 \
|
||
libgomp1
|
||
|
||
echo ""
|
||
echo "[3/5] 创建 Python 虚拟环境..."
|
||
if [ -d "venv" ]; then
|
||
echo "虚拟环境已存在,跳过创建"
|
||
else
|
||
python3 -m venv venv
|
||
fi
|
||
|
||
echo ""
|
||
echo "[4/5] 激活虚拟环境并安装依赖..."
|
||
source venv/bin/activate
|
||
pip install --upgrade pip
|
||
|
||
echo ""
|
||
echo "安装 Python 依赖包..."
|
||
pip install -r requirements.txt
|
||
|
||
echo ""
|
||
echo "[5/5] 验证安装..."
|
||
python3 -c "import fitz; print(f'PyMuPDF: {fitz.version}')"
|
||
python3 -c "from ultralytics import YOLO; print('Ultralytics: OK')"
|
||
python3 -c "from paddleocr import PaddleOCR; print('PaddleOCR: OK')"
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo "安装完成!"
|
||
echo "=========================================="
|
||
echo ""
|
||
echo "使用方法:"
|
||
echo " 1. 激活虚拟环境: source venv/bin/activate"
|
||
echo " 2. 运行自动标注: python -m src.cli.autolabel --help"
|
||
echo " 3. 训练模型: python -m src.cli.train --help"
|
||
echo " 4. 推理: python -m src.cli.infer --help"
|
||
echo ""
|
||
|
||
# 检查 GPU
|
||
echo "检查 GPU 支持..."
|
||
if command -v nvidia-smi &> /dev/null; then
|
||
echo "检测到 NVIDIA GPU:"
|
||
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
|
||
echo ""
|
||
echo "提示: 运行以下命令启用 GPU 加速:"
|
||
echo " pip install paddlepaddle-gpu"
|
||
else
|
||
echo "未检测到 GPU,将使用 CPU 模式"
|
||
fi
|