|
from abc import ABC, abstractmethod |
|
import os |
|
from langchain.embeddings import ( |
|
OpenAIEmbeddings, |
|
HuggingFaceEmbeddings, |
|
CohereEmbeddings, |
|
HuggingFaceInstructEmbeddings, |
|
) |
|
from application.core.settings import settings |
|
|
|
class BaseVectorStore(ABC): |
|
def __init__(self): |
|
pass |
|
|
|
@abstractmethod |
|
def search(self, *args, **kwargs): |
|
pass |
|
|
|
def is_azure_configured(self): |
|
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME |
|
|
|
def _get_embeddings(self, embeddings_name, embeddings_key=None): |
|
embeddings_factory = { |
|
"openai_text-embedding-ada-002": OpenAIEmbeddings, |
|
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, |
|
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, |
|
"cohere_medium": CohereEmbeddings |
|
} |
|
|
|
if embeddings_name not in embeddings_factory: |
|
raise ValueError(f"Invalid embeddings_name: {embeddings_name}") |
|
|
|
if embeddings_name == "openai_text-embedding-ada-002": |
|
if self.is_azure_configured(): |
|
os.environ["OPENAI_API_TYPE"] = "azure" |
|
embedding_instance = embeddings_factory[embeddings_name]( |
|
model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME |
|
) |
|
else: |
|
embedding_instance = embeddings_factory[embeddings_name]( |
|
openai_api_key=embeddings_key |
|
) |
|
elif embeddings_name == "cohere_medium": |
|
embedding_instance = embeddings_factory[embeddings_name]( |
|
cohere_api_key=embeddings_key |
|
) |
|
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2": |
|
embedding_instance = embeddings_factory[embeddings_name]( |
|
model_name="./model/all-mpnet-base-v2", |
|
model_kwargs={"device": "cpu"}, |
|
) |
|
else: |
|
embedding_instance = embeddings_factory[embeddings_name]() |
|
|
|
return embedding_instance |
|
|
|
|