Spaces:

DAMO-NLP-SG
/

CoI_Agent

Running

App Files Files Community

jianghuyihei commited on Oct 12

Commit

a81bf47

•

1 Parent(s): 5b59897

fix

Browse files

Files changed (1) hide show

searcher/sementic_search.py +23 -2

searcher/sementic_search.py CHANGED Viewed

@@ -55,6 +55,23 @@ async def fetch(url):
         print(e)
         return None
 class Result:
     def __init__(self,title="",abstract="",article = "",citations_conut = 0,year = None) -> None:
         self.title = title
@@ -283,7 +300,7 @@ Abstract: {paper['abstract']}
             for result in paper_candidates:
                 pdf_link = result['openAccessPdf']["url"]
                 try:
-                    content = await self.download_pdf_async(pdf_link)
                     if not content:
                         continue
                 except Exception as e:
@@ -359,7 +376,7 @@ Abstract: {paper['abstract']}
         print(f"Found {len(related_papers)} related papers")
         for paper in related_papers:
             url = paper[2]
-            content = await self.download_pdf_async(url)
             if content:
                 article = self.read_arxiv_from_path(content)
                 if not article:
@@ -375,6 +392,10 @@ Abstract: {paper['abstract']}
             return None
         else:
             return content
     def read_paper_title_abstract(self,article):
         title = article["title"]

         print(e)
         return None
+def download(url):
+    try:
+        response = requests.get(url)
+        if response.status_code == 200:
+            return response.content
+        else:
+            print(f"Failed to download the file from the URL: {url}")
+            return None
+    except requests.RequestException as e:
+        print(f"An error occurred while downloading the file from the URL: {url}")
+        print(e)
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred while downloading the file from the URL: {url}")
+        print(e)
+        return None
 class Result:
     def __init__(self,title="",abstract="",article = "",citations_conut = 0,year = None) -> None:
         self.title = title
             for result in paper_candidates:
                 pdf_link = result['openAccessPdf']["url"]
                 try:
+                    content = self.download_pdf(pdf_link)
                     if not content:
                         continue
                 except Exception as e:
         print(f"Found {len(related_papers)} related papers")
         for paper in related_papers:
             url = paper[2]
+            content = self.download_pdf(url)
             if content:
                 article = self.read_arxiv_from_path(content)
                 if not article:
             return None
         else:
             return content
+    def download_pdf(self, pdf_link):
+        content = download(pdf_link)
+        return content
     def read_paper_title_abstract(self,article):
         title = article["title"]