raannakasturi commited on
Commit
8a237e1
·
1 Parent(s): 485c7d1

Update fetch_title regex for title extraction and modify main execution to fetch paper data by PMC ID

Browse files
Files changed (1) hide show
  1. fetch_paper_data.py +2 -3
fetch_paper_data.py CHANGED
@@ -67,8 +67,7 @@ def fetch_citation(doi):
67
  def fetch_title(doi):
68
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
69
  bibtex_entry = title_content.decode('utf-8').strip()
70
- print(bibtex_entry)
71
- title = re.search(r'title\s*=\s*{(.*?)},\s+url', bibtex_entry)
72
  print(title)
73
  if title:
74
  return title.group(1).strip()
@@ -98,5 +97,5 @@ def fetch_paper_data(id):
98
  return json.dumps(data, indent=4, ensure_ascii=False)
99
 
100
  if __name__ == '__main__':
101
- data = fetch_paper_data('2412.20276')
102
  print(data)
 
67
  def fetch_title(doi):
68
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
69
  bibtex_entry = title_content.decode('utf-8').strip()
70
+ title = re.search(r'title\s*=\s*{(.*?)},\s+', bibtex_entry)
 
71
  print(title)
72
  if title:
73
  return title.group(1).strip()
 
97
  return json.dumps(data, indent=4, ensure_ascii=False)
98
 
99
  if __name__ == '__main__':
100
+ data = fetch_paper_data('PMC11577001')
101
  print(data)