AmazingDoc/app/routers/documents.py

# app/routers/documents.py
import uuid
import mimetypes
import base64
from fastapi import APIRouter, UploadFile, File, HTTPException
from typing import Dict, Any, List
from fastapi.concurrency import run_in_threadpool
from PIL import Image
from io import BytesIO

from .. import agents
from ..core.pdf_processor import convert_pdf_to_images, image_to_base64_str

# Create an APIRouter instance
router = APIRouter(
    prefix="/documents",
    tags=["Document Processing"],
)

# Simulate an SQL database to store the final results
db_results: Dict[str, Any] = {}

async def multimodal_process_pipeline(doc_id: str, image: Image.Image, page_num: int):
    image_base64 = await run_in_threadpool(image_to_base64_str, image)
    classification_result = await agents.agent_classify_document_from_image(image_base64)
    category = classification_result.category
    language = classification_result.language
    print(f"Document page {page_num} classified as: {category}, Language: {language}")

    extraction_result = None
    if category in ["RECEIPT", "INVOICE"]:
        if category == "RECEIPT":
            extraction_result = await agents.agent_extract_receipt_info(image_base64, language)
        elif category == "INVOICE":
            extraction_result = await agents.agent_extract_invoice_info(image_base64, language)
    else:
        print(f"Document classified as '{category}', skipping extraction.")

    final_result = {
        "doc_id": f"{doc_id}_page_{page_num}",
        "category": category,
        "language": language,
        "extraction_data": extraction_result.dict() if extraction_result else None,
        "status": "Processed"
    }
    db_results[final_result["doc_id"]] = final_result
    return final_result

@router.post("/process", summary="upload and process a document")
async def upload_and_process_document(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided.")

    doc_id = str(uuid.uuid4())
    print(f"\nReceiving document: {file.filename} (allocated ID: {doc_id})")
    contents = await file.read()

    try:
        file_type = mimetypes.guess_type(file.filename)[0]
        print(f"File type: {file_type}")

        images: List[Image.Image] = []
        if file_type == 'application/pdf':
            images = await run_in_threadpool(convert_pdf_to_images, contents)
        elif file_type in ['image/png', 'image/jpeg']:
            images.append(Image.open(BytesIO(contents)))
        else:
            raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}.")

        if not images:
            raise HTTPException(status_code=400, detail="Could not extract images from document.")

        images_base64 = [await run_in_threadpool(image_to_base64_str, img) for img in images]

        classification_result = await agents.agent_classify_document_from_image(images_base64)
        category = classification_result.category
        language = classification_result.language
        print(f"The document is classified as: {category}, Language: {language}")

        extraction_result = None
        if category in ["RECEIPT", "INVOICE"]:
            if category == "RECEIPT":
                extraction_result = await agents.agent_extract_receipt_info(images_base64, language)
            elif category == "INVOICE":
                extraction_result = await agents.agent_extract_invoice_info(images_base64, language)
        else:
            print(f"The document is classified as '{category}'，skipping extraction。")

        # 3. Return a unified result
        final_result = {
            "doc_id": doc_id,
            "page_count": len(images),
            "category": category,
            "language": language,
            "extraction_data": extraction_result.dict() if extraction_result else None,
            "status": "Processed"
        }
        db_results[doc_id] = final_result
        return final_result

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

@router.get("/results/{doc_id}", summary="Get result by doc_id")
async def get_result(doc_id: str):
    if doc_id in db_results:
        return db_results[doc_id]
    raise HTTPException(status_code=404, detail="Document not found.")