# app/core/vector_store.py import os import chromadb from dotenv import load_dotenv from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings load_dotenv() LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower() embedding_model = None print(f"--- [Core] Initializing Embeddings with provider: {LLM_PROVIDER} ---") if LLM_PROVIDER == "azure": required_vars = [ "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY", "OPENAI_API_VERSION", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME" ] if not all(os.getenv(var) for var in required_vars): raise ValueError("One or more Azure OpenAI environment variables for embeddings are not set.") embedding_model = AzureOpenAIEmbeddings( azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=os.getenv("OPENAI_API_VERSION"), azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"), ) elif LLM_PROVIDER == "openai": if not os.getenv("OPENAI_API_KEY"): raise ValueError("OPENAI_API_KEY is not set for the 'openai' provider.") embedding_model = OpenAIEmbeddings( api_key=os.getenv("OPENAI_API_KEY"), model=os.getenv("OPENAI_EMBEDDING_MODEL_NAME", "text-embedding-3-small") ) else: raise ValueError(f"Unsupported LLM_PROVIDER: {LLM_PROVIDER}. Please use 'azure' or 'openai'.") client = chromadb.PersistentClient(path="./chroma_db") vector_store = client.get_or_create_collection( name="documents", metadata={"hnsw:space": "cosine"} )