wzkariampuzha commited on
Commit
9d85d67
·
1 Parent(s): e517955

Update classify_abs.py

Browse files
Files changed (1) hide show
  1. classify_abs.py +38 -40
classify_abs.py CHANGED
@@ -288,46 +288,44 @@ def streamlist_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults
288
  searchterm_list = [searchterm_list]
289
  else:
290
  searchterm_list = list(searchterm_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
- percent_by_step = 1/(maxResults*1.25) #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
293
-
294
- with PMIDs_bar = st.progress(0):
295
-
296
-
297
- for dz in searchterm_list:
298
- term = ''
299
- dz_words = dz.split()
300
- for word in dz_words:
301
- term += word + '%20'
302
- query = term[:-3]
303
-
304
- url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query
305
- r = requests.get(url)
306
- root = ET.fromstring(r.content)
307
-
308
- for result in root.iter('IdList'):
309
- if len(pmids) >= maxResults:
310
- break
311
- pmidlist = [pmid.text for pmid in result.iter('Id')]
312
- pmids.update(pmidlist)
313
- PMIDs_bar.progress(round(len(pmids)*percent_by_step,1))
314
-
315
- url = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query='+query+'&resulttype=core'
316
- r = requests.get(url)
317
- root = ET.fromstring(r.content)
318
-
319
- for result in root.iter('result'):
320
- if len(pmids) >= maxResults:
321
- break
322
- pmidlist = [pmid.text for pmid in result.iter('id')]
323
- if len(pmidlist) > 0:
324
- pmid = pmidlist[0]
325
- if pmid[0].isdigit():
326
- pmids.add(pmid)
327
- PMIDs_bar.progress(round(len(pmids)*percent_by_step,1))
328
- st.success('Found',len(pmids),'PMIDs. Gathering Abstracts and Filtering...')
329
-
330
- with abstracts_bar = st.progress(0):
331
  percent_by_step = 1/(maxResults)
332
  if filtering !='none' or filtering !='strict':
333
  filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
@@ -353,7 +351,7 @@ def streamlist_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults
353
  if set(filter_terms).intersection(set(word_tokenize(abstract))):
354
  pmid_abs[pmid] = abstract
355
  abstracts_bar.progress(round(len(pmid_abs)*percent_by_step,1))
356
-
357
  st.success('Found',len(pmids),'PMIDs. Gathered',len(pmid_abs),'Relevant Abstracts.')
358
 
359
  return pmid_abs
 
288
  searchterm_list = [searchterm_list]
289
  else:
290
  searchterm_list = list(searchterm_list)
291
+ #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
292
+ percent_by_step = 1/(maxResults*1.25)
293
+ PMIDs_bar = st.progress(0):
294
+ for dz in searchterm_list:
295
+ term = ''
296
+ dz_words = dz.split()
297
+ for word in dz_words:
298
+ term += word + '%20'
299
+ query = term[:-3]
300
+
301
+ url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query
302
+ r = requests.get(url)
303
+ root = ET.fromstring(r.content)
304
+
305
+ for result in root.iter('IdList'):
306
+ if len(pmids) >= maxResults:
307
+ break
308
+ pmidlist = [pmid.text for pmid in result.iter('Id')]
309
+ pmids.update(pmidlist)
310
+ PMIDs_bar.progress(round(len(pmids)*percent_by_step,1))
311
+
312
+ url = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query='+query+'&resulttype=core'
313
+ r = requests.get(url)
314
+ root = ET.fromstring(r.content)
315
+
316
+ for result in root.iter('result'):
317
+ if len(pmids) >= maxResults:
318
+ break
319
+ pmidlist = [pmid.text for pmid in result.iter('id')]
320
+ if len(pmidlist) > 0:
321
+ pmid = pmidlist[0]
322
+ if pmid[0].isdigit():
323
+ pmids.add(pmid)
324
+ PMIDs_bar.progress(round(len(pmids)*percent_by_step,1))
325
+ PMIDs_bar.empty()
326
 
327
+ with st.success('Found',len(pmids),'PMIDs. Gathering Abstracts and Filtering...'):
328
+ abstracts_bar = st.progress(0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  percent_by_step = 1/(maxResults)
330
  if filtering !='none' or filtering !='strict':
331
  filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
 
351
  if set(filter_terms).intersection(set(word_tokenize(abstract))):
352
  pmid_abs[pmid] = abstract
353
  abstracts_bar.progress(round(len(pmid_abs)*percent_by_step,1))
354
+ abstracts_bar.empty()
355
  st.success('Found',len(pmids),'PMIDs. Gathered',len(pmid_abs),'Relevant Abstracts.')
356
 
357
  return pmid_abs