Commit
·
485c7d1
1
Parent(s):
42db160
Update fetch_title to improve regex for title extraction and adjust main execution to fetch paper data by ID
Browse files- fetch_paper_data.py +4 -2
fetch_paper_data.py
CHANGED
@@ -67,7 +67,9 @@ def fetch_citation(doi):
|
|
67 |
def fetch_title(doi):
|
68 |
title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
|
69 |
bibtex_entry = title_content.decode('utf-8').strip()
|
70 |
-
|
|
|
|
|
71 |
if title:
|
72 |
return title.group(1).strip()
|
73 |
return None
|
@@ -96,5 +98,5 @@ def fetch_paper_data(id):
|
|
96 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
97 |
|
98 |
if __name__ == '__main__':
|
99 |
-
data =
|
100 |
print(data)
|
|
|
67 |
def fetch_title(doi):
|
68 |
title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
|
69 |
bibtex_entry = title_content.decode('utf-8').strip()
|
70 |
+
print(bibtex_entry)
|
71 |
+
title = re.search(r'title\s*=\s*{(.*?)},\s+url', bibtex_entry)
|
72 |
+
print(title)
|
73 |
if title:
|
74 |
return title.group(1).strip()
|
75 |
return None
|
|
|
98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
99 |
|
100 |
if __name__ == '__main__':
|
101 |
+
data = fetch_paper_data('2412.20276')
|
102 |
print(data)
|