wzkariampuzha commited on
Commit
77a72db
·
1 Parent(s): 44803cb

Update extract_abs.py

Browse files
Files changed (1) hide show
  1. extract_abs.py +24 -21
extract_abs.py CHANGED
@@ -302,27 +302,30 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
302
 
303
  #Gather title+abstracts into a dictionary {pmid:abstract}
304
  pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
305
- st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
306
-
307
- i = 0
308
- my_bar = st.progress(i)
309
- percent_at_step = 100/len(pmid_abs)
310
- for pmid, abstract in pmid_abs.items():
311
- epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
312
- if isEpi:
313
- #Preprocessing Functions for Extraction
314
- sentences = str2sents(abstract)
315
- model_outputs = [NER_pipeline(sent) for sent in sentences]
316
- extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
317
- if extraction:
318
- extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
319
- #Slow dataframe update
320
- results = results.append(extraction, ignore_index=True)
321
- i+=1
322
- my_bar.progress(round(i*percent_at_step/100,1))
323
-
324
- st.write(len(results),'abstracts classified as epidemiological.')
325
- return results.sort_values('EPI_PROB', ascending=False)
 
 
 
326
 
327
  #Identical to search_term_extraction, except it returns a JSON object instead of a df
328
  def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
 
302
 
303
  #Gather title+abstracts into a dictionary {pmid:abstract}
304
  pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
305
+ if len(pmid_abs)==0:
306
+ st.error('No results were gathered. Enter a new search term.')
307
+ else:
308
+ st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
309
+
310
+ i = 0
311
+ my_bar = st.progress(i)
312
+ percent_at_step = 100/len(pmid_abs)
313
+ for pmid, abstract in pmid_abs.items():
314
+ epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
315
+ if isEpi:
316
+ #Preprocessing Functions for Extraction
317
+ sentences = str2sents(abstract)
318
+ model_outputs = [NER_pipeline(sent) for sent in sentences]
319
+ extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
320
+ if extraction:
321
+ extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
322
+ #Slow dataframe update
323
+ results = results.append(extraction, ignore_index=True)
324
+ i+=1
325
+ my_bar.progress(round(i*percent_at_step/100,1))
326
+
327
+ st.write(len(results),'abstracts classified as epidemiological.')
328
+ return results.sort_values('EPI_PROB', ascending=False)
329
 
330
  #Identical to search_term_extraction, except it returns a JSON object instead of a df
331
  def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search