Spaces:
Sleeping
Sleeping
import os | |
from langchain_chroma import Chroma | |
# from langchain_ollama import OllamaEmbeddings | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_core.documents import Document | |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = './.cache' | |
embed = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-mpnet-base-v2") | |
# embed = OllamaEmbeddings( | |
# model="jina/jina-embeddings-v2-base-es") # Initialize embeddings | |
def save_handbook_to_chroma(handbook_data: list) -> bool: | |
""" | |
Saves the entire handbook data to Chroma with embeddings. | |
Args: | |
handbook_data (list): List of dictionaries containing title, URL, and text content of each section. | |
Returns: | |
bool: True if the handbook is saved correctly, False otherwise. | |
""" | |
documents = [] | |
for chapter in handbook_data: | |
for section in chapter: | |
document = Document( | |
page_content=section.get('text', ''), | |
metadata={ | |
'title': section.get('title', ''), | |
'url': section.get('url', '') | |
} | |
) | |
documents.append(document) | |
print("Saving handbook to Chroma. This process can take a long time.") | |
try: | |
ids = [str(i) for i in range(1, len(documents) + 1)] | |
Chroma.from_documents( | |
documents=documents, embedding=embed, persist_directory="./chroma_data", ids=ids) | |
return True | |
except Exception as e: | |
print(f"Error saving handbook to Chroma: {e}") | |
return False | |
def ask_chroma(question: str, k: int = 3) -> dict: | |
""" | |
Asks Chroma a question and returns the top k most similar results. | |
Args: | |
question (str): The question to ask Chroma. | |
k (int): The number of most similar results to return. Default is 3. | |
Returns: | |
dict: A dictionary containing the top k most similar results. | |
""" | |
try: | |
vectorstore = Chroma( | |
embedding_function=embed, # Provide the embedding function | |
persist_directory="./chroma_data" | |
) | |
results = vectorstore.similarity_search(question, k) | |
return results | |
except Exception as e: | |
print(f"Error asking Chroma: {e}") | |
return {} | |