Spaces:
Running
Running
wzkariampuzha
commited on
Commit
·
bd8a308
1
Parent(s):
221e51c
Update extract_abs.py
Browse files- extract_abs.py +4 -4
extract_abs.py
CHANGED
@@ -290,9 +290,9 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
290 |
#Format of Output
|
291 |
ordered_labels = order_labels(entity_classes)
|
292 |
if extract_diseases:
|
293 |
-
columns = ['PMID', 'ABSTRACT','
|
294 |
else:
|
295 |
-
columns = ['PMID', 'ABSTRACT','
|
296 |
|
297 |
results = pd.DataFrame(columns=columns)
|
298 |
|
@@ -321,7 +321,7 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
321 |
model_outputs = [NER_pipeline(sent) for sent in sentences]
|
322 |
extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
|
323 |
if extraction:
|
324 |
-
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, '
|
325 |
#Slow dataframe update
|
326 |
results = results.append(extraction, ignore_index=True)
|
327 |
epidemiologic+=1
|
@@ -330,7 +330,7 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
330 |
|
331 |
sankey_data = (gathered, relevant,epidemiologic)
|
332 |
st.write(len(results),'abstracts classified as epidemiological.')
|
333 |
-
return results.sort_values('
|
334 |
|
335 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
336 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|
|
|
290 |
#Format of Output
|
291 |
ordered_labels = order_labels(entity_classes)
|
292 |
if extract_diseases:
|
293 |
+
columns = ['PMID', 'ABSTRACT','PROB_OF_EPI','IsEpi','IDS','DIS']+ordered_labels
|
294 |
else:
|
295 |
+
columns = ['PMID', 'ABSTRACT','PROB_OF_EPI','IsEpi']+ordered_labels
|
296 |
|
297 |
results = pd.DataFrame(columns=columns)
|
298 |
|
|
|
321 |
model_outputs = [NER_pipeline(sent) for sent in sentences]
|
322 |
extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
|
323 |
if extraction:
|
324 |
+
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'PROB_OF_EPI':epi_prob, 'IsEpi':isEpi})
|
325 |
#Slow dataframe update
|
326 |
results = results.append(extraction, ignore_index=True)
|
327 |
epidemiologic+=1
|
|
|
330 |
|
331 |
sankey_data = (gathered, relevant,epidemiologic)
|
332 |
st.write(len(results),'abstracts classified as epidemiological.')
|
333 |
+
return results.sort_values('PROB_OF_EPI', ascending=False), sankey_data
|
334 |
|
335 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
336 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|