Files
AmazingDoc/app/core/vector_store.py
2025-08-11 14:20:56 +02:00

47 lines
1.5 KiB
Python

# app/core/vector_store.py
import os
import chromadb
from dotenv import load_dotenv
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
load_dotenv()
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower()
embedding_model = None
print(f"--- [Core] Initializing Embeddings with provider: {LLM_PROVIDER} ---")
if LLM_PROVIDER == "azure":
required_vars = [
"AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY",
"OPENAI_API_VERSION", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"
]
if not all(os.getenv(var) for var in required_vars):
raise ValueError("One or more Azure OpenAI environment variables for embeddings are not set.")
embedding_model = AzureOpenAIEmbeddings(
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
api_version=os.getenv("OPENAI_API_VERSION"),
azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
)
elif LLM_PROVIDER == "openai":
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY is not set for the 'openai' provider.")
embedding_model = OpenAIEmbeddings(
api_key=os.getenv("OPENAI_API_KEY"),
model=os.getenv("OPENAI_EMBEDDING_MODEL_NAME", "text-embedding-3-small")
)
else:
raise ValueError(f"Unsupported LLM_PROVIDER: {LLM_PROVIDER}. Please use 'azure' or 'openai'.")
client = chromadb.PersistentClient(path="./chroma_db")
vector_store = client.get_or_create_collection(
name="documents",
metadata={"hnsw:space": "cosine"}
)