from langchain_community.document_loaders.text import TextLoader from langchain_community.vectorstores import Chroma from langchain_text_splitters import RecursiveCharacterTextSplitter from setup import * # Use a relative path: file = "Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project loader = TextLoader(file_path=file) pages = [] for page in loader.load(): pages.append(page) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50, add_start_index=True, separators=["\n", "\n\n"] ) all_splits = text_splitter.split_documents(docs) print(f"Split blog post into {len(all_splits)} sub-documents.") # Instead of Windows absolute path for persistence: # persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG" # Use a relative path: persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory vector_store = Chroma.from_documents( documents=all_splits, collection_name='sagemaker-chroma', persist_directory=persist_directory, embedding=embeddings )