Commit
·
11659c8
1
Parent(s):
3984911
Implement DOI fetching for PMC IDs and refactor citation generation
Browse files- fetch_arxiv_data.py +17 -5
fetch_arxiv_data.py
CHANGED
@@ -6,7 +6,11 @@ HEADERS = {
|
|
6 |
}
|
7 |
|
8 |
def fetch_pmc_doi(pmc_id):
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def fetch_arxiv_doi(arxiv_id):
|
12 |
page_url = f"https://arxiv.org/abs/{arxiv_id}"
|
@@ -15,10 +19,18 @@ def fetch_arxiv_doi(arxiv_id):
|
|
15 |
doi = page_data.find('td', {'class': "tablecell arxivdoi"}).find('a', {'id': 'arxiv-doi-link'}).text
|
16 |
return doi
|
17 |
|
18 |
-
def
|
19 |
citation_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=apa'}).content
|
20 |
return citation_content.decode('utf-8')
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
}
|
7 |
|
8 |
def fetch_pmc_doi(pmc_id):
|
9 |
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/[email protected]&ids={pmc_id}&format=json"
|
10 |
+
response = requests.get(url, headers=HEADERS).json()
|
11 |
+
if response['status'] == 'ok':
|
12 |
+
doi = response['records'][0]['doi']
|
13 |
+
return f"https://doi.org/{doi}"
|
14 |
|
15 |
def fetch_arxiv_doi(arxiv_id):
|
16 |
page_url = f"https://arxiv.org/abs/{arxiv_id}"
|
|
|
19 |
doi = page_data.find('td', {'class': "tablecell arxivdoi"}).find('a', {'id': 'arxiv-doi-link'}).text
|
20 |
return doi
|
21 |
|
22 |
+
def fetch_citation(doi):
|
23 |
citation_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=apa'}).content
|
24 |
return citation_content.decode('utf-8')
|
25 |
|
26 |
+
def generate_citation(id):
|
27 |
+
if id.startswith('PMC'):
|
28 |
+
doi = fetch_pmc_doi(id)
|
29 |
+
else:
|
30 |
+
doi = fetch_arxiv_doi(id)
|
31 |
+
citation = fetch_citation(doi).replace('\n', ' ').replace('<i>', '').replace('</i>', '')
|
32 |
+
return citation
|
33 |
+
|
34 |
+
if __name__ == '__main__':
|
35 |
+
citation = generate_citation('2412.14338')
|
36 |
+
print(citation)
|