File size: 768 Bytes
372531f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from langchain_community.retrievers import ArxivRetriever


class ArxivScraper:

    def __init__(self, link, session=None):
        self.link = link
        self.session = session

    def scrape(self):
        """

        The function scrapes relevant documents from Arxiv based on a given link and returns the content

        of the first document.

        

        Returns:

          The code is returning the page content of the first document retrieved by the ArxivRetriever

        for a given query extracted from the link.

        """
        query = self.link.split("/")[-1]
        retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
        docs = retriever.invoke(query=query)
        return docs[0].page_content