Spaces:
Runtime error
Runtime error
Yijun-Yang
commited on
Commit
·
ac3d7d8
1
Parent(s):
b90b0df
updatesucesspara
Browse files
huixiangdou/service/findarticles.py
CHANGED
@@ -60,7 +60,7 @@ class ArticleRetrieval:
|
|
60 |
if not os.path.exists(self.repo_dir):
|
61 |
os.makedirs(self.repo_dir)
|
62 |
print(f"Saving articles to {self.repo_dir}.")
|
63 |
-
success = 0
|
64 |
for id in tqdm(self.pmc_ids, desc="Fetching full texts", unit="article"):
|
65 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
66 |
params = {
|
@@ -71,19 +71,20 @@ class ArticleRetrieval:
|
|
71 |
}
|
72 |
response = requests.get(base_url, params=params)
|
73 |
full_text = self._clean_xml(response.text)
|
74 |
-
if full_text == '':
|
75 |
continue
|
76 |
else:
|
|
|
77 |
with open(os.path.join(self.repo_dir,f'PMC{id}.txt'), 'w') as f:
|
78 |
f.write(full_text)
|
79 |
-
success += 1
|
80 |
|
81 |
def save_config(self):
|
82 |
config = {
|
83 |
'keywords': self.keywords,
|
84 |
'repo_dir': self.repo_dir,
|
85 |
'pmc_ids': self.pmc_ids,
|
86 |
-
'len': success,
|
87 |
'retmax': self.retmax
|
88 |
}
|
89 |
with open(os.path.join(self.repo_dir, 'config.json'), 'w') as f:
|
|
|
60 |
if not os.path.exists(self.repo_dir):
|
61 |
os.makedirs(self.repo_dir)
|
62 |
print(f"Saving articles to {self.repo_dir}.")
|
63 |
+
self.success = 0
|
64 |
for id in tqdm(self.pmc_ids, desc="Fetching full texts", unit="article"):
|
65 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
66 |
params = {
|
|
|
71 |
}
|
72 |
response = requests.get(base_url, params=params)
|
73 |
full_text = self._clean_xml(response.text)
|
74 |
+
if full_text.strip() == '':
|
75 |
continue
|
76 |
else:
|
77 |
+
logger.info(full_text[:1000])
|
78 |
with open(os.path.join(self.repo_dir,f'PMC{id}.txt'), 'w') as f:
|
79 |
f.write(full_text)
|
80 |
+
self.success += 1
|
81 |
|
82 |
def save_config(self):
|
83 |
config = {
|
84 |
'keywords': self.keywords,
|
85 |
'repo_dir': self.repo_dir,
|
86 |
'pmc_ids': self.pmc_ids,
|
87 |
+
'len': self.success,
|
88 |
'retmax': self.retmax
|
89 |
}
|
90 |
with open(os.path.join(self.repo_dir, 'config.json'), 'w') as f:
|