Spaces:
Running
Running
wzkariampuzha
commited on
Commit
·
71c29ef
1
Parent(s):
490c9b8
Update classify_abs.py
Browse files- classify_abs.py +8 -9
classify_abs.py
CHANGED
@@ -290,7 +290,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
|
|
290 |
else:
|
291 |
searchterm_list = list(searchterm_list)
|
292 |
#maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
|
293 |
-
percent_by_step = 1/
|
294 |
with st.spinner("Gathering PubMed IDs..."):
|
295 |
PMIDs_bar = st.progress(0)
|
296 |
for dz in searchterm_list:
|
@@ -328,7 +328,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
|
|
328 |
|
329 |
with st.spinner("Found "+str(len(pmids))+" PMIDs. Gathering Abstracts and Filtering..."):
|
330 |
abstracts_bar = st.progress(0)
|
331 |
-
percent_by_step = 1/
|
332 |
if filtering !='none' or filtering !='strict':
|
333 |
filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
|
334 |
|
@@ -340,23 +340,22 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
|
|
340 |
uncased_ab = abstract.lower()
|
341 |
for term in searchterm_list:
|
342 |
if term.lower() in uncased_ab:
|
343 |
-
pmid_abs[pmid] = abstract
|
344 |
-
abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
|
345 |
break
|
346 |
elif filtering =='none':
|
347 |
pmid_abs[pmid] = abstract
|
348 |
-
abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
|
349 |
-
|
350 |
#Default filtering is 'lenient'.
|
351 |
else:
|
352 |
#Else and if are separated for readability and to better understand logical flow.
|
353 |
if set(filter_terms).intersection(set(word_tokenize(abstract))):
|
354 |
pmid_abs[pmid] = abstract
|
355 |
-
|
356 |
abstracts_bar.empty()
|
357 |
-
|
|
|
|
|
358 |
|
359 |
-
return pmid_abs, (
|
360 |
|
361 |
# Generate predictions for a PubMed Id
|
362 |
# nlp: en_core_web_lg
|
|
|
290 |
else:
|
291 |
searchterm_list = list(searchterm_list)
|
292 |
#maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
|
293 |
+
percent_by_step = 1/maxResults
|
294 |
with st.spinner("Gathering PubMed IDs..."):
|
295 |
PMIDs_bar = st.progress(0)
|
296 |
for dz in searchterm_list:
|
|
|
328 |
|
329 |
with st.spinner("Found "+str(len(pmids))+" PMIDs. Gathering Abstracts and Filtering..."):
|
330 |
abstracts_bar = st.progress(0)
|
331 |
+
percent_by_step = 1/maxResults
|
332 |
if filtering !='none' or filtering !='strict':
|
333 |
filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
|
334 |
|
|
|
340 |
uncased_ab = abstract.lower()
|
341 |
for term in searchterm_list:
|
342 |
if term.lower() in uncased_ab:
|
343 |
+
pmid_abs[pmid] = abstract
|
|
|
344 |
break
|
345 |
elif filtering =='none':
|
346 |
pmid_abs[pmid] = abstract
|
|
|
|
|
347 |
#Default filtering is 'lenient'.
|
348 |
else:
|
349 |
#Else and if are separated for readability and to better understand logical flow.
|
350 |
if set(filter_terms).intersection(set(word_tokenize(abstract))):
|
351 |
pmid_abs[pmid] = abstract
|
352 |
+
abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
|
353 |
abstracts_bar.empty()
|
354 |
+
found = len(pmids)
|
355 |
+
relevant = len(pmid_abs)
|
356 |
+
st.success('Found '+str(found)+' PMIDs. Gathered '+str(relevant)+' Relevant Abstracts. Classifying and extracting epidemiology information...')
|
357 |
|
358 |
+
return pmid_abs, (found, relevant)
|
359 |
|
360 |
# Generate predictions for a PubMed Id
|
361 |
# nlp: en_core_web_lg
|