Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

App Files Files Community

PabloVD commited on Nov 2, 2024

Commit

2ccbf76

1 Parent(s): f3576a5

Replace pdf loading by urls loading

Browse files

Files changed (2) hide show

app.py +16 -17
urls.txt +42 -0

app.py CHANGED Viewed

@@ -22,15 +22,15 @@ rate_limiter = InMemoryRateLimiter(
     max_bucket_size=10,  # Controls the maximum burst size.
 )
-# Get data from url
-url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
-r = requests.get(url, stream=True)
-document_path = Path('data.pdf')
-document_path.write_bytes(r.content)
-# document_path = "camels-readthedocs-io-en-latest.pdf"
-loader = PyPDFLoader(document_path)
-docs = loader.load()
 # # Load, chunk and index the contents of the blog.
 # url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
@@ -70,15 +70,14 @@ docs = loader.load()
 # # base_url = "https://carla.readthedocs.io/en/latest/"
 # # urls = get_subpages(base_url)
-# tokenfile = open("urls.txt")
-# urls = tokenfile.readlines()
-# urls = [url.replace("\n","") for url in urls]
-# tokenfile.close()
-# print(urls)
-# # Load, chunk and index the contents of the blog.
-# loader = WebBaseLoader(urls)
-# docs = loader.load()
 def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)

     max_bucket_size=10,  # Controls the maximum burst size.
 )
+# # Get data from url
+# url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
+# r = requests.get(url, stream=True)
+# document_path = Path('data.pdf')
+# document_path.write_bytes(r.content)
+# # document_path = "camels-readthedocs-io-en-latest.pdf"
+# loader = PyPDFLoader(document_path)
+# docs = loader.load()
 # # Load, chunk and index the contents of the blog.
 # url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
 # # base_url = "https://carla.readthedocs.io/en/latest/"
 # # urls = get_subpages(base_url)
+urlsfile = open("urls.txt")
+urls = urlsfile.readlines()
+urls = [url.replace("\n","") for url in urls]
+urlsfile.close()
+# Load, chunk and index the contents of the blog.
+loader = WebBaseLoader(urls)
+docs = loader.load()
 def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)

urls.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+https://camels.readthedocs.io/en/latest/
+https://camels.readthedocs.io/en/latest/news.html
+https://camels.readthedocs.io/en/latest/goals.html
+https://camels.readthedocs.io/en/latest/publications.html
+https://camels.readthedocs.io/en/latest/data_access.html
+https://camels.readthedocs.io/en/latest/citation.html
+https://camels.readthedocs.io/en/latest/description.html
+https://camels.readthedocs.io/en/latest/suites_sets.html
+https://camels.readthedocs.io/en/latest/codes.html
+https://camels.readthedocs.io/en/latest/parameters.html
+https://camels.readthedocs.io/en/latest/organization.html
+https://camels.readthedocs.io/en/latest/snapshots.html
+https://camels.readthedocs.io/en/latest/subfind.html
+https://camels.readthedocs.io/en/latest/SubLink.html
+https://camels.readthedocs.io/en/latest/rockstar.html
+https://camels.readthedocs.io/en/latest/ahf.html
+https://camels.readthedocs.io/en/latest/caesar.html
+https://camels.readthedocs.io/en/latest/Pk.html
+https://camels.readthedocs.io/en/latest/Bk.html
+https://camels.readthedocs.io/en/latest/pdf.html
+https://camels.readthedocs.io/en/latest/VIDE.html
+https://camels.readthedocs.io/en/latest/Lya.html
+https://camels.readthedocs.io/en/latest/Xrays.html
+https://camels.readthedocs.io/en/latest/Profiles.html
+https://camels.readthedocs.io/en/latest/CMD.html
+https://camels.readthedocs.io/en/latest/SAM.html
+https://camels.readthedocs.io/en/latest/zoomGZ.html
+https://camels.readthedocs.io/en/latest/tutorials.html
+https://camels.readthedocs.io/en/latest/images.html
+https://camels.readthedocs.io/en/latest/camels_library.html
+https://camels.readthedocs.io/en/latest/pylians3.html
+https://camels.readthedocs.io/en/latest/team.html
+https://camels.readthedocs.io/en/latest/contact.html
+https://camels.readthedocs.io/en/latest/logo.html
+https://camels.readthedocs.io/en/latest/examples/Reading_Manipulating_Snapshots.html
+https://camels.readthedocs.io/en/latest/examples/Pk.html
+https://camels.readthedocs.io/en/latest/examples/Images.html
+https://camels.readthedocs.io/en/latest/examples/particles_subhalos.html
+https://camels.readthedocs.io/en/latest/index.html
+https://camels.readthedocs.io/en/latest/Images.html
+https://camels.readthedocs.io/en/latest/particles_subhalos.html
+https://camels.readthedocs.io/en/latest/Reading_Manipulating_Snapshots.html