raannakasturi commited on
Commit
485c7d1
·
1 Parent(s): 42db160

Update fetch_title to improve regex for title extraction and adjust main execution to fetch paper data by ID

Browse files
Files changed (1) hide show
  1. fetch_paper_data.py +4 -2
fetch_paper_data.py CHANGED
@@ -67,7 +67,9 @@ def fetch_citation(doi):
67
  def fetch_title(doi):
68
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
69
  bibtex_entry = title_content.decode('utf-8').strip()
70
- title = re.search(r'title\s*=\s*{(.*?)}', bibtex_entry)
 
 
71
  if title:
72
  return title.group(1).strip()
73
  return None
@@ -96,5 +98,5 @@ def fetch_paper_data(id):
96
  return json.dumps(data, indent=4, ensure_ascii=False)
97
 
98
  if __name__ == '__main__':
99
- data = fetch_pmc_pdf('PMC5334499')
100
  print(data)
 
67
  def fetch_title(doi):
68
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
69
  bibtex_entry = title_content.decode('utf-8').strip()
70
+ print(bibtex_entry)
71
+ title = re.search(r'title\s*=\s*{(.*?)},\s+url', bibtex_entry)
72
+ print(title)
73
  if title:
74
  return title.group(1).strip()
75
  return None
 
98
  return json.dumps(data, indent=4, ensure_ascii=False)
99
 
100
  if __name__ == '__main__':
101
+ data = fetch_paper_data('2412.20276')
102
  print(data)