Init

2025-08-11 14:20:56 +02:00
parent 0a80400720
commit f077c6351d
17 changed files with 165 additions and 248 deletions
--- a/app/agents/receipt_agent.py
+++ b/app/agents/receipt_agent.py
@@ -4,15 +4,15 @@ from langchain_core.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
 from ..core.llm import llm
 from ..schemas import ReceiptInfo
+from typing import List

 parser = PydanticOutputParser(pydantic_object=ReceiptInfo)

-# 更新Prompt模板以包含语言信息
 receipt_template = """
 You are a highly accurate receipt information extraction robot.
 The document's primary language is '{language}'.
-Please extract all key information from the following receipt image.
-If some information is not present in the image, leave it as null.
+Please extract all key information from the following receipt images, which belong to a single document.
+If some information is not present in the images, leave it as null.
 Please strictly follow the JSON format below, without adding any extra explanations or comments.

 {format_instructions}
@@ -25,22 +25,21 @@ receipt_prompt = PromptTemplate(
 )


-async def agent_extract_receipt_info(image_base64: str, language: str) -> ReceiptInfo:
-    """Agent 2: Extracts receipt information from an image, aware of the document's language."""
+async def agent_extract_receipt_info(images_base64: List[str], language: str) -> ReceiptInfo:
+    """Agent 2: Extracts receipt information from a list of images, aware of the document's language."""
    print(f"--- [Agent 2] Calling multimodal LLM to extract receipt info (Language: {language})...")

    prompt_text = await receipt_prompt.aformat(language=language)

-    msg = HumanMessage(
-        content=[
-            {"type": "text", "text": prompt_text},
-            {
-                "type": "image_url",
-                "image_url": f"data:image/png;base64,{image_base64}",
-            },
-        ]
-    )
+    content_parts = [{"type": "text", "text": prompt_text}]
+    for image_base64 in images_base64:
+        content_parts.append({
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{image_base64}"},
+        })
+
+    msg = HumanMessage(content=content_parts)

    chain = llm | parser
    receipt_info = await chain.ainvoke([msg])
-    return receipt_info
+    return receipt_info