Spaces:
Running
Running
jianghuyihei
commited on
Commit
•
a81bf47
1
Parent(s):
5b59897
fix
Browse files- searcher/sementic_search.py +23 -2
searcher/sementic_search.py
CHANGED
@@ -55,6 +55,23 @@ async def fetch(url):
|
|
55 |
print(e)
|
56 |
return None
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
class Result:
|
59 |
def __init__(self,title="",abstract="",article = "",citations_conut = 0,year = None) -> None:
|
60 |
self.title = title
|
@@ -283,7 +300,7 @@ Abstract: {paper['abstract']}
|
|
283 |
for result in paper_candidates:
|
284 |
pdf_link = result['openAccessPdf']["url"]
|
285 |
try:
|
286 |
-
content =
|
287 |
if not content:
|
288 |
continue
|
289 |
except Exception as e:
|
@@ -359,7 +376,7 @@ Abstract: {paper['abstract']}
|
|
359 |
print(f"Found {len(related_papers)} related papers")
|
360 |
for paper in related_papers:
|
361 |
url = paper[2]
|
362 |
-
content =
|
363 |
if content:
|
364 |
article = self.read_arxiv_from_path(content)
|
365 |
if not article:
|
@@ -375,6 +392,10 @@ Abstract: {paper['abstract']}
|
|
375 |
return None
|
376 |
else:
|
377 |
return content
|
|
|
|
|
|
|
|
|
378 |
|
379 |
def read_paper_title_abstract(self,article):
|
380 |
title = article["title"]
|
|
|
55 |
print(e)
|
56 |
return None
|
57 |
|
58 |
+
def download(url):
|
59 |
+
try:
|
60 |
+
response = requests.get(url)
|
61 |
+
if response.status_code == 200:
|
62 |
+
return response.content
|
63 |
+
else:
|
64 |
+
print(f"Failed to download the file from the URL: {url}")
|
65 |
+
return None
|
66 |
+
except requests.RequestException as e:
|
67 |
+
print(f"An error occurred while downloading the file from the URL: {url}")
|
68 |
+
print(e)
|
69 |
+
return None
|
70 |
+
except Exception as e:
|
71 |
+
print(f"An unexpected error occurred while downloading the file from the URL: {url}")
|
72 |
+
print(e)
|
73 |
+
return None
|
74 |
+
|
75 |
class Result:
|
76 |
def __init__(self,title="",abstract="",article = "",citations_conut = 0,year = None) -> None:
|
77 |
self.title = title
|
|
|
300 |
for result in paper_candidates:
|
301 |
pdf_link = result['openAccessPdf']["url"]
|
302 |
try:
|
303 |
+
content = self.download_pdf(pdf_link)
|
304 |
if not content:
|
305 |
continue
|
306 |
except Exception as e:
|
|
|
376 |
print(f"Found {len(related_papers)} related papers")
|
377 |
for paper in related_papers:
|
378 |
url = paper[2]
|
379 |
+
content = self.download_pdf(url)
|
380 |
if content:
|
381 |
article = self.read_arxiv_from_path(content)
|
382 |
if not article:
|
|
|
392 |
return None
|
393 |
else:
|
394 |
return content
|
395 |
+
|
396 |
+
def download_pdf(self, pdf_link):
|
397 |
+
content = download(pdf_link)
|
398 |
+
return content
|
399 |
|
400 |
def read_paper_title_abstract(self,article):
|
401 |
title = article["title"]
|