# app/core/vector_store.py import os import chromadb from dotenv import load_dotenv from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings # 加载.env文件中的环境变量 load_dotenv() # 获取配置的LLM供应商 LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower() embedding_model = None print(f"--- [Core] Initializing Embeddings with provider: {LLM_PROVIDER} ---") if LLM_PROVIDER == "azure": # --- Azure OpenAI 配置 --- required_vars = [ "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY", "OPENAI_API_VERSION", "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME" ] if not all(os.getenv(var) for var in required_vars): raise ValueError("One or more Azure OpenAI environment variables for embeddings are not set.") embedding_model = AzureOpenAIEmbeddings( azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=os.getenv("OPENAI_API_VERSION"), azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"), ) elif LLM_PROVIDER == "openai": # --- 标准 OpenAI 配置 --- if not os.getenv("OPENAI_API_KEY"): raise ValueError("OPENAI_API_KEY is not set for the 'openai' provider.") embedding_model = OpenAIEmbeddings( api_key=os.getenv("OPENAI_API_KEY"), model=os.getenv("OPENAI_EMBEDDING_MODEL_NAME", "text-embedding-3-small") ) else: raise ValueError(f"Unsupported LLM_PROVIDER: {LLM_PROVIDER}. Please use 'azure' or 'openai'.") # 初始化ChromaDB客户端 (无变化) client = chromadb.PersistentClient(path="./chroma_db") vector_store = client.get_or_create_collection( name="documents", metadata={"hnsw:space": "cosine"} )