Spaces:
Running
Running
from langchain_community.document_loaders.text import TextLoader | |
from langchain_community.vectorstores import Chroma | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from setup import * | |
# Use a relative path: | |
file = "Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project | |
loader = TextLoader(file_path=file) | |
pages = [] | |
for page in loader.load(): | |
pages.append(page) | |
docs = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, | |
chunk_overlap=50, | |
add_start_index=True, | |
separators=["\n", "\n\n"] | |
) | |
all_splits = text_splitter.split_documents(docs) | |
print(f"Split blog post into {len(all_splits)} sub-documents.") | |
# Instead of Windows absolute path for persistence: | |
# persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG" | |
# Use a relative path: | |
persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory | |
vector_store = Chroma.from_documents( | |
documents=all_splits, | |
collection_name='sagemaker-chroma', | |
persist_directory=persist_directory, | |
embedding=embeddings | |
) |