This commit is contained in:
2025-08-11 21:38:25 +02:00
parent f87834a1b3
commit 87ba009bd7
4 changed files with 21 additions and 42 deletions

View File

@@ -5,10 +5,10 @@ from typing import Dict, Any, List
from fastapi.concurrency import run_in_threadpool
from PIL import Image
from io import BytesIO
from .. import agents
from ..core.pdf_processor import convert_pdf_to_images, image_to_base64_str
from ..core.ocr import extract_text_from_images
from ..core.vector_store import embedding_model, vector_store
# Create an APIRouter instance
router = APIRouter(
@@ -102,10 +102,12 @@ async def upload_and_process_document(
full_text = await run_in_threadpool(extract_text_from_images, images)
background_tasks.add_task(
agents.agent_vectorize_and_store,
doc_id,
full_text,
category,
language
doc_id=doc_id,
text=full_text,
category=category,
language=language,
embedding_model=embedding_model,
vector_store=vector_store
)
print("--- [Main] Vectorization job added to background tasks.")
@@ -118,4 +120,4 @@ async def upload_and_process_document(
async def get_result(doc_id: str):
if doc_id in db_results:
return db_results[doc_id]
raise HTTPException(status_code=404, detail="Document not found.")
raise HTTPException(status_code=404, detail="Document not found.")