Dharma20 commited on
Commit
162cd18
·
verified ·
1 Parent(s): 814f9c6

Update gradio_embedding.py

Browse files
Files changed (1) hide show
  1. gradio_embedding.py +36 -36
gradio_embedding.py CHANGED
@@ -1,37 +1,37 @@
1
- from langchain_community.document_loaders.text import TextLoader
2
- from langchain_community.vectorstores import Chroma
3
- from langchain_text_splitters import RecursiveCharacterTextSplitter
4
- from setup import *
5
-
6
- # Use a relative path:
7
- file = "./data/Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project
8
-
9
- loader = TextLoader(file_path=file)
10
- pages = []
11
- for page in loader.load():
12
- pages.append(page)
13
-
14
- docs = loader.load()
15
-
16
- text_splitter = RecursiveCharacterTextSplitter(
17
- chunk_size=500,
18
- chunk_overlap=50,
19
- add_start_index=True,
20
- separators=["\n", "\n\n"]
21
- )
22
-
23
- all_splits = text_splitter.split_documents(docs)
24
- print(f"Split blog post into {len(all_splits)} sub-documents.")
25
-
26
- # Instead of Windows absolute path for persistence:
27
- # persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG"
28
-
29
- # Use a relative path:
30
- persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory
31
-
32
- vector_store = Chroma.from_documents(
33
- documents=all_splits,
34
- collection_name='sagemaker-chroma',
35
- persist_directory=persist_directory,
36
- embedding=embeddings
37
  )
 
1
+ from langchain_community.document_loaders.text import TextLoader
2
+ from langchain_community.vectorstores import Chroma
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from setup import *
5
+
6
+ # Use a relative path:
7
+ file = "Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project
8
+
9
+ loader = TextLoader(file_path=file)
10
+ pages = []
11
+ for page in loader.load():
12
+ pages.append(page)
13
+
14
+ docs = loader.load()
15
+
16
+ text_splitter = RecursiveCharacterTextSplitter(
17
+ chunk_size=500,
18
+ chunk_overlap=50,
19
+ add_start_index=True,
20
+ separators=["\n", "\n\n"]
21
+ )
22
+
23
+ all_splits = text_splitter.split_documents(docs)
24
+ print(f"Split blog post into {len(all_splits)} sub-documents.")
25
+
26
+ # Instead of Windows absolute path for persistence:
27
+ # persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG"
28
+
29
+ # Use a relative path:
30
+ persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory
31
+
32
+ vector_store = Chroma.from_documents(
33
+ documents=all_splits,
34
+ collection_name='sagemaker-chroma',
35
+ persist_directory=persist_directory,
36
+ embedding=embeddings
37
  )