47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
# app/core/vector_store.py
|
|
import os
|
|
import chromadb
|
|
from dotenv import load_dotenv
|
|
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
|
|
|
load_dotenv()
|
|
|
|
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower()
|
|
|
|
embedding_model = None
|
|
|
|
print(f"--- [Core] Initializing Embeddings with provider: {LLM_PROVIDER} ---")
|
|
|
|
if LLM_PROVIDER == "azure":
|
|
required_vars = [
|
|
"AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY",
|
|
"OPENAI_API_VERSION", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"
|
|
]
|
|
if not all(os.getenv(var) for var in required_vars):
|
|
raise ValueError("One or more Azure OpenAI environment variables for embeddings are not set.")
|
|
|
|
embedding_model = AzureOpenAIEmbeddings(
|
|
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
|
api_version=os.getenv("OPENAI_API_VERSION"),
|
|
azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
|
|
)
|
|
|
|
elif LLM_PROVIDER == "openai":
|
|
if not os.getenv("OPENAI_API_KEY"):
|
|
raise ValueError("OPENAI_API_KEY is not set for the 'openai' provider.")
|
|
|
|
embedding_model = OpenAIEmbeddings(
|
|
api_key=os.getenv("OPENAI_API_KEY"),
|
|
model=os.getenv("OPENAI_EMBEDDING_MODEL_NAME", "text-embedding-3-small")
|
|
)
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported LLM_PROVIDER: {LLM_PROVIDER}. Please use 'azure' or 'openai'.")
|
|
|
|
|
|
client = chromadb.PersistentClient(path="./chroma_db")
|
|
vector_store = client.get_or_create_collection(
|
|
name="documents",
|
|
metadata={"hnsw:space": "cosine"}
|
|
) |