FAQ-Chatbot / gradio_embedding.py
Dharma20's picture
Update gradio_embedding.py
162cd18 verified
from langchain_community.document_loaders.text import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from setup import *
# Use a relative path:
file = "Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project
loader = TextLoader(file_path=file)
pages = []
for page in loader.load():
pages.append(page)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
add_start_index=True,
separators=["\n", "\n\n"]
)
all_splits = text_splitter.split_documents(docs)
print(f"Split blog post into {len(all_splits)} sub-documents.")
# Instead of Windows absolute path for persistence:
# persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG"
# Use a relative path:
persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory
vector_store = Chroma.from_documents(
documents=all_splits,
collection_name='sagemaker-chroma',
persist_directory=persist_directory,
embedding=embeddings
)