raannakasturi commited on
Commit
11659c8
·
1 Parent(s): 3984911

Implement DOI fetching for PMC IDs and refactor citation generation

Browse files
Files changed (1) hide show
  1. fetch_arxiv_data.py +17 -5
fetch_arxiv_data.py CHANGED
@@ -6,7 +6,11 @@ HEADERS = {
6
  }
7
 
8
  def fetch_pmc_doi(pmc_id):
9
- pass
 
 
 
 
10
 
11
  def fetch_arxiv_doi(arxiv_id):
12
  page_url = f"https://arxiv.org/abs/{arxiv_id}"
@@ -15,10 +19,18 @@ def fetch_arxiv_doi(arxiv_id):
15
  doi = page_data.find('td', {'class': "tablecell arxivdoi"}).find('a', {'id': 'arxiv-doi-link'}).text
16
  return doi
17
 
18
- def generate_citation(doi):
19
  citation_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=apa'}).content
20
  return citation_content.decode('utf-8')
21
 
22
- doi = fetch_arxiv_doi("2412.14338")
23
- citation = generate_citation(doi)
24
- print(citation)
 
 
 
 
 
 
 
 
 
6
  }
7
 
8
  def fetch_pmc_doi(pmc_id):
9
+ url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/[email protected]&ids={pmc_id}&format=json"
10
+ response = requests.get(url, headers=HEADERS).json()
11
+ if response['status'] == 'ok':
12
+ doi = response['records'][0]['doi']
13
+ return f"https://doi.org/{doi}"
14
 
15
  def fetch_arxiv_doi(arxiv_id):
16
  page_url = f"https://arxiv.org/abs/{arxiv_id}"
 
19
  doi = page_data.find('td', {'class': "tablecell arxivdoi"}).find('a', {'id': 'arxiv-doi-link'}).text
20
  return doi
21
 
22
+ def fetch_citation(doi):
23
  citation_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=apa'}).content
24
  return citation_content.decode('utf-8')
25
 
26
+ def generate_citation(id):
27
+ if id.startswith('PMC'):
28
+ doi = fetch_pmc_doi(id)
29
+ else:
30
+ doi = fetch_arxiv_doi(id)
31
+ citation = fetch_citation(doi).replace('\n', ' ').replace('<i>', '').replace('</i>', '')
32
+ return citation
33
+
34
+ if __name__ == '__main__':
35
+ citation = generate_citation('2412.14338')
36
+ print(citation)