Spaces:
Running
Running
wzkariampuzha
commited on
Commit
·
a3cf8bc
1
Parent(s):
5454234
Create fast_api_app.py
Browse files- fast_api_app.py +113 -0
fast_api_app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Union
|
2 |
+
import nltk
|
3 |
+
nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"])
|
4 |
+
from epi_pipeline import (
|
5 |
+
search_getAbs,
|
6 |
+
API_search_extraction,
|
7 |
+
NER_Pipeline,
|
8 |
+
GARD_Search,
|
9 |
+
Classify_Pipeline,
|
10 |
+
)
|
11 |
+
from fastapi import FastAPI, Path, Query
|
12 |
+
from enum import Enum
|
13 |
+
import json
|
14 |
+
|
15 |
+
#These pipelines need to be loaded
|
16 |
+
rd_identify = GARD_Search()
|
17 |
+
epi_classify = Classify_Pipeline()
|
18 |
+
epi_extract = NER_Pipeline()
|
19 |
+
#Load the app
|
20 |
+
app = FastAPI()
|
21 |
+
|
22 |
+
#Create Filtering Class
|
23 |
+
## Need to predefine types of filtering that we will accept
|
24 |
+
## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values
|
25 |
+
class FilteringType(str, Enum):
|
26 |
+
none = 'none'
|
27 |
+
lenient = 'lenient'
|
28 |
+
strict = 'strict'
|
29 |
+
|
30 |
+
|
31 |
+
@app.get("/")
|
32 |
+
async def root():
|
33 |
+
return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"}
|
34 |
+
|
35 |
+
# Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/
|
36 |
+
# Example query:
|
37 |
+
## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none
|
38 |
+
## Where '?' separates the required and optional inputs
|
39 |
+
## and '&' separates the optional inputs from each other
|
40 |
+
|
41 |
+
@app.get("/get_RD_Abs/term={search_term}")
|
42 |
+
async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."),
|
43 |
+
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
|
44 |
+
filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
|
45 |
+
|
46 |
+
searchterm_list = rd_identify.autosearch(search_term)
|
47 |
+
|
48 |
+
if filtering == FilteringType.none:
|
49 |
+
filtering = 'none'
|
50 |
+
elif filtering == FilteringType.lenient:
|
51 |
+
filtering = 'lenient'
|
52 |
+
if filtering == FilteringType.strict:
|
53 |
+
filtering = 'strict'
|
54 |
+
else:
|
55 |
+
print(filtering)
|
56 |
+
raise ValueError("Filtering must be either 'strict','lenient', or 'none'.")
|
57 |
+
|
58 |
+
|
59 |
+
return json.dumps(search_getAbs(searchterm_list, max_results, filtering))
|
60 |
+
|
61 |
+
@app.get("/epi_extract_rare_disease/term={search_term}")
|
62 |
+
async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."),
|
63 |
+
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
|
64 |
+
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
|
65 |
+
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
|
66 |
+
return API_search_extraction(
|
67 |
+
search_term, max_results, filtering,
|
68 |
+
epi_extract, rd_identify, extract_diseases, epi_classify)
|
69 |
+
|
70 |
+
@app.post("/epi_extract_text/text={text}")
|
71 |
+
async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"),
|
72 |
+
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
|
73 |
+
return API_text_extraction(text, #Text to be extracted
|
74 |
+
epi_ner, #for biobert extraction
|
75 |
+
GARD_Search, extract_diseases, #for disease extraction
|
76 |
+
)
|
77 |
+
|
78 |
+
#Batch Abstracts
|
79 |
+
# Example query:
|
80 |
+
## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict
|
81 |
+
|
82 |
+
@app.get("/get_RD_Abs_batch/terms={rd_list}")
|
83 |
+
async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."),
|
84 |
+
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000),
|
85 |
+
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
|
86 |
+
|
87 |
+
rd_list = rd_list.split(';')
|
88 |
+
output = []
|
89 |
+
for rd in rd_list:
|
90 |
+
searchterm_list = rd_identify.autosearch(rd)
|
91 |
+
studies = json.loads(search_getAbs(searchterm_list, max_results, filtering))
|
92 |
+
output.append({"Disease": rd, "Studies": studies})
|
93 |
+
|
94 |
+
return output
|
95 |
+
#return json.dumps(output)
|
96 |
+
|
97 |
+
#Batch Epi Extraction
|
98 |
+
@app.get("/epi_extract_RD_batch/terms={rd_list}")
|
99 |
+
async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."),
|
100 |
+
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
|
101 |
+
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
|
102 |
+
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
|
103 |
+
|
104 |
+
rd_list = rd_list.split(';')
|
105 |
+
output = []
|
106 |
+
for rd in rd_list:
|
107 |
+
extraction = json.loads(API_search_extraction(
|
108 |
+
rd, max_results, filtering,
|
109 |
+
epi_extract, rd_identify, extract_diseases, epi_classify))
|
110 |
+
output.append({"Disease": rd, "Extraction": extraction})
|
111 |
+
|
112 |
+
return output
|
113 |
+
#return json.dumps(output)
|