AmazingDoc/app/agents/receipt_agent.py

# app/agents/receipt_agent.py
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from ..core.llm import llm
from ..schemas import ReceiptInfo

parser = PydanticOutputParser(pydantic_object=ReceiptInfo)

# 更新Prompt模板以包含语言信息
receipt_template = """
You are a highly accurate receipt information extraction robot.
The document's primary language is '{language}'.
Please extract all key information from the following receipt image.
If some information is not present in the image, leave it as null.
Please strictly follow the JSON format below, without adding any extra explanations or comments.

{format_instructions}
"""

receipt_prompt = PromptTemplate(
    template=receipt_template,
    input_variables=["language"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


async def agent_extract_receipt_info(image_base64: str, language: str) -> ReceiptInfo:
    """Agent 2: Extracts receipt information from an image, aware of the document's language."""
    print(f"--- [Agent 2] Calling multimodal LLM to extract receipt info (Language: {language})...")

    prompt_text = await receipt_prompt.aformat(language=language)

    msg = HumanMessage(
        content=[
            {"type": "text", "text": prompt_text},
            {
                "type": "image_url",
                "image_url": f"data:image/png;base64,{image_base64}",
            },
        ]
    )

    chain = llm | parser
    receipt_info = await chain.ainvoke([msg])
    return receipt_info