Files
AmazingDoc/app/agents.py
Yaojia Wang 0a80400720 Init project
2025-08-11 00:07:41 +02:00

51 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# app/agents.py
import asyncio
import random
from .schemas import ReceiptInfo, InvoiceInfo, ReceiptItem
# --- Agent核心功能 (占位符/模拟实现) ---
# 在实际应用中这些函数将被替换为调用LangChain和LLM的真实逻辑。
async def agent_classify_document(text: str) -> str:
"""Agent 1: 文件分类 (模拟)"""
print("--- [Agent 1] 正在进行文档分类...")
await asyncio.sleep(0.5) # 模拟网络延迟
doc_types = ["信件", "收据", "发票", "合约"]
if "发票" in text: return "发票"
if "收据" in text or "小票" in text: return "收据"
if "合同" in text or "协议" in text: return "合约"
return random.choice(doc_types)
async def agent_extract_receipt_info(text: str) -> ReceiptInfo:
"""Agent 2: 收据信息提取 (模拟)"""
print("--- [Agent 2] 正在提取收据信息...")
await asyncio.sleep(1) # 模拟LLM处理时间
return ReceiptInfo(
merchant_name="模拟超市",
transaction_date="2025-08-10",
total_amount=198.50,
items=[ReceiptItem(name="牛奶", quantity=2, price=11.5)]
)
async def agent_extract_invoice_info(text: str) -> InvoiceInfo:
"""Agent 3: 发票信息提取 (模拟)"""
print("--- [Agent 3] 正在提取发票信息...")
await asyncio.sleep(1) # 模拟LLM处理时间
return InvoiceInfo(
invoice_number="INV123456789",
issue_date="2025-08-09",
seller_name="模拟科技有限公司",
total_amount_in_figures=12000.00
)
async def agent_vectorize_and_store(doc_id: str, text: str, category: str, vector_db: dict):
"""Agent 4: 向量化并存储 (模拟)"""
print(f"--- [Agent 4] 正在向量化文档 (ID: {doc_id})...")
await asyncio.sleep(0.5)
chunks = [text[i:i+200] for i in range(0, len(text), 200)]
vector_db[doc_id] = {
"metadata": {"category": category, "chunk_count": len(chunks)},
"content_chunks": chunks,
"vectors": [random.random() for _ in range(len(chunks) * 128)]
}
print(f"--- [Agent 4] 文档 {doc_id} 已存入向量数据库。")