Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- Symptom2Disease.csv +0 -0
- Symptom2Disease_1.csv +0 -0
- app.py +58 -21
- model.py +1 -1
- preprocess_data.py +17 -0
- pretrained_gru_model.pth +3 -0
- requirements.txt +0 -5
- test_gradio.py +252 -0
- train.py +19 -18
Symptom2Disease.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
Symptom2Disease_1.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
#locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
4 |
import accelerate
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -14,9 +13,11 @@ from sklearn.model_selection import train_test_split
|
|
14 |
import time
|
15 |
from timeit import default_timer as timer
|
16 |
from typing import Tuple, Dict
|
|
|
|
|
|
|
17 |
'''
|
18 |
import nltk
|
19 |
-
import nltk_u
|
20 |
from nltk.corpus import stopwords
|
21 |
from nltk.stem import SnowballStemmer
|
22 |
from nltk.tokenize import word_tokenize
|
@@ -47,7 +48,20 @@ class GRU_model(nn.Module):
|
|
47 |
def __init__(self):
|
48 |
super().__init__()
|
49 |
|
50 |
-
self.rnn= nn.GRU(input_size=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
self.output= nn.Linear(in_features=240, out_features=24).to('cuda')
|
52 |
|
53 |
def forward(self, x):
|
@@ -55,7 +69,33 @@ class GRU_model(nn.Module):
|
|
55 |
y = y.to('cuda')
|
56 |
x= self.output(y).to('cuda')
|
57 |
return(x)
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
#########################################################################################################################
|
60 |
if torch.cuda.is_available():
|
61 |
device = "cuda"
|
@@ -73,7 +113,6 @@ train_data, test_data= train_test_split(df, test_size=0.2, random_state=1)
|
|
73 |
'''
|
74 |
#########################################################################################################################
|
75 |
#vectorizer = TfidfVectorizer(tokenizer=tokenize, stop_words=english_stopwords).fit(train_data.text)
|
76 |
-
#vectorizer= nltk_u.vectorizer()
|
77 |
#vectorizer.fit(train_data.text)
|
78 |
#from sklearn.feature_extraction.text import TfidfVectorizer
|
79 |
#from spacy.lang.de.stop_words import STOP_WORDS
|
@@ -125,13 +164,13 @@ with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
|
|
125 |
chatbot = gr.Chatbot()
|
126 |
msg = gr.Textbox()
|
127 |
clear = gr.ClearButton([msg, chatbot])
|
128 |
-
def respond(message, chat_history, base_model = "gru_model", embedder = SentenceTransformer("bge-small-en-v1.5", device="cuda"), device='cuda'): # "meta-llama/Meta-Llama-3-70B"
|
129 |
-
#base_model
|
130 |
if base_model == "gru_model":
|
131 |
# Model and transforms preparation
|
132 |
-
model=
|
133 |
# Load state dict
|
134 |
-
model.load_state_dict(torch.load(f= '
|
135 |
# Random greetings in list format
|
136 |
greetings = ["hello!",'hello', 'hii !', 'hi', "hi there!", "hi there!", "heyy", 'good morning', 'good afternoon', 'good evening', "hey", "how are you", "how are you?", "how is it going", "how is it going?", "what's up?",
|
137 |
"how are you?", "hey, how are you?", "what is popping", "good to see you!", "howdy!", "hi, nice to meet you.", "hiya!", "hi", "hi, what's new?", "hey, how's your day?", "hi, how have you been?", "greetings"]
|
@@ -176,14 +215,16 @@ with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
|
|
176 |
elif message.lower() in goodbyes:
|
177 |
bot_message= random.choice(goodbyes_replies)
|
178 |
else:
|
|
|
|
|
179 |
#transform_text= vectorizer.transform([message])
|
180 |
-
sentence_embeddings = embedder.encode(message)
|
181 |
-
sentence_embeddings = torch.from_numpy(sentence_embeddings).float().to(device).unsqueeze(dim=0)
|
182 |
-
sentence_embeddings.shape
|
183 |
#transform_text= torch.tensor(transform_text.toarray()).to(torch.float32)
|
184 |
model.eval()
|
185 |
with torch.inference_mode():
|
186 |
-
y_logits=model(
|
187 |
pred_prob= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
188 |
test_pred= class_names[pred_prob.item()]
|
189 |
bot_message = f' Based on your symptoms, I believe you are having {test_pred} and I would advice you {disease_advice[test_pred]}'
|
@@ -225,16 +266,12 @@ with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
|
|
225 |
|
226 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
227 |
# Launch the demo
|
228 |
-
demo.launch(
|
229 |
-
|
230 |
-
#msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
231 |
-
# Launch the demo
|
232 |
-
#demo.launch()
|
233 |
|
234 |
|
235 |
#gr.ChatInterface(respond).launch()
|
236 |
|
237 |
-
|
238 |
#a = respond('hi', list())
|
239 |
#a = respond("Hi, good morning")
|
240 |
#a = respond("My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensation.")
|
|
|
1 |
+
import locale
|
2 |
+
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
|
|
3 |
import accelerate
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
13 |
import time
|
14 |
from timeit import default_timer as timer
|
15 |
from typing import Tuple, Dict
|
16 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
17 |
+
from spacy.lang.de.stop_words import STOP_WORDS
|
18 |
+
vectorizer = TfidfVectorizer(stop_words=list(STOP_WORDS))
|
19 |
'''
|
20 |
import nltk
|
|
|
21 |
from nltk.corpus import stopwords
|
22 |
from nltk.stem import SnowballStemmer
|
23 |
from nltk.tokenize import word_tokenize
|
|
|
48 |
def __init__(self):
|
49 |
super().__init__()
|
50 |
|
51 |
+
self.rnn= nn.GRU(input_size=1477, hidden_size=240,num_layers=1, bias= True).to('cuda') ## nonlinearity= 'relu',
|
52 |
+
self.output= nn.Linear(in_features=240, out_features=24).to('cuda')
|
53 |
+
|
54 |
+
def forward(self, x):
|
55 |
+
y, hidden= self.rnn(x)
|
56 |
+
y = y.to('cuda')
|
57 |
+
x= self.output(y).to('cuda')
|
58 |
+
return(x)
|
59 |
+
|
60 |
+
class RNN_model(nn.Module):
|
61 |
+
def __init__(self):
|
62 |
+
super().__init__()
|
63 |
+
|
64 |
+
self.rnn= nn.RNN(input_size=1477, hidden_size=240,num_layers=1, nonlinearity= 'relu', bias= True).to('cuda')
|
65 |
self.output= nn.Linear(in_features=240, out_features=24).to('cuda')
|
66 |
|
67 |
def forward(self, x):
|
|
|
69 |
y = y.to('cuda')
|
70 |
x= self.output(y).to('cuda')
|
71 |
return(x)
|
72 |
+
|
73 |
+
#embedder = SentenceTransformer("bge-small-en-v1.5", device="cuda")
|
74 |
+
embedder = SentenceTransformer("/home/henry/Desktop/ARIN_7102/download/bge-small-en-v1.5", device="cuda")
|
75 |
+
|
76 |
+
df= pd.read_csv('Symptom2Disease_1.csv')
|
77 |
+
|
78 |
+
target=['Psoriasis', 'Varicose Veins', 'Typhoid', 'Chicken pox',
|
79 |
+
'Impetigo', 'Dengue', 'Fungal infection', 'Common Cold',
|
80 |
+
'Pneumonia', 'Dimorphic Hemorrhoids', 'Arthritis', 'Acne',
|
81 |
+
'Bronchial Asthma', 'Hypertension', 'Migraine',
|
82 |
+
'Cervical spondylosis', 'Jaundice', 'Malaria',
|
83 |
+
'urinary tract infection', 'allergy',
|
84 |
+
'gastroesophageal reflux disease', 'drug reaction',
|
85 |
+
'peptic ulcer disease', 'diabetes']
|
86 |
+
target_dict= {i:j for i,j in enumerate(sorted(target))}
|
87 |
+
df['label']= df['label'].replace({j:i for i,j in enumerate(sorted(target))})
|
88 |
+
df.drop('Unnamed: 0', axis= 1, inplace= True)
|
89 |
+
df.duplicated().sum()
|
90 |
+
df[df.duplicated]
|
91 |
+
df.drop_duplicates(inplace= True)
|
92 |
+
df['label'].value_counts()
|
93 |
+
train_data, test_data= train_test_split(df, test_size=0.15, random_state=42 )
|
94 |
+
train_data['label'].value_counts().sort_index()
|
95 |
+
test_data['label'].value_counts().sort_index()
|
96 |
+
vectorizer.fit(train_data.text)
|
97 |
+
vectorizer.get_feature_names_out()[: 100]
|
98 |
+
vectorizer= vectorizer
|
99 |
#########################################################################################################################
|
100 |
if torch.cuda.is_available():
|
101 |
device = "cuda"
|
|
|
113 |
'''
|
114 |
#########################################################################################################################
|
115 |
#vectorizer = TfidfVectorizer(tokenizer=tokenize, stop_words=english_stopwords).fit(train_data.text)
|
|
|
116 |
#vectorizer.fit(train_data.text)
|
117 |
#from sklearn.feature_extraction.text import TfidfVectorizer
|
118 |
#from spacy.lang.de.stop_words import STOP_WORDS
|
|
|
164 |
chatbot = gr.Chatbot()
|
165 |
msg = gr.Textbox()
|
166 |
clear = gr.ClearButton([msg, chatbot])
|
167 |
+
def respond(message, chat_history, base_model = "gru_model", embedder = SentenceTransformer("/home/henry/Desktop/ARIN_7102/download/bge-small-en-v1.5", device="cuda"), device='cuda'): # "meta-llama/Meta-Llama-3-70B"
|
168 |
+
#base_model = /home/henry/Desktop/ARIN_7102/download/phi-2 # gru_model # embedder = SentenceTransformer("/home/henry/Desktop/ARIN_7102/download/bge-small-en-v1.5", device="cuda")
|
169 |
if base_model == "gru_model":
|
170 |
# Model and transforms preparation
|
171 |
+
model= RNN_model().to(device)
|
172 |
# Load state dict
|
173 |
+
model.load_state_dict(torch.load(f= 'pretrained_gru_model.pth', map_location= device))
|
174 |
# Random greetings in list format
|
175 |
greetings = ["hello!",'hello', 'hii !', 'hi', "hi there!", "hi there!", "heyy", 'good morning', 'good afternoon', 'good evening', "hey", "how are you", "how are you?", "how is it going", "how is it going?", "what's up?",
|
176 |
"how are you?", "hey, how are you?", "what is popping", "good to see you!", "howdy!", "hi, nice to meet you.", "hiya!", "hi", "hi, what's new?", "hey, how's your day?", "hi, how have you been?", "greetings"]
|
|
|
215 |
elif message.lower() in goodbyes:
|
216 |
bot_message= random.choice(goodbyes_replies)
|
217 |
else:
|
218 |
+
transformed_new= vectorizer.transform([message])
|
219 |
+
transformed_new= torch.tensor(transformed_new.toarray()).to(torch.float32).to('cuda')
|
220 |
#transform_text= vectorizer.transform([message])
|
221 |
+
#sentence_embeddings = embedder.encode(message)
|
222 |
+
#sentence_embeddings = torch.from_numpy(sentence_embeddings).float().to(device).unsqueeze(dim=0)
|
223 |
+
#sentence_embeddings.shape
|
224 |
#transform_text= torch.tensor(transform_text.toarray()).to(torch.float32)
|
225 |
model.eval()
|
226 |
with torch.inference_mode():
|
227 |
+
y_logits=model(transformed_new.to(device))
|
228 |
pred_prob= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
229 |
test_pred= class_names[pred_prob.item()]
|
230 |
bot_message = f' Based on your symptoms, I believe you are having {test_pred} and I would advice you {disease_advice[test_pred]}'
|
|
|
266 |
|
267 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
268 |
# Launch the demo
|
269 |
+
demo.launch()
|
270 |
+
#demo.launch(share=True)
|
|
|
|
|
|
|
271 |
|
272 |
|
273 |
#gr.ChatInterface(respond).launch()
|
274 |
|
|
|
275 |
#a = respond('hi', list())
|
276 |
#a = respond("Hi, good morning")
|
277 |
#a = respond("My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensation.")
|
model.py
CHANGED
@@ -5,7 +5,7 @@ class RNN_model(nn.Module):
|
|
5 |
def __init__(self):
|
6 |
super().__init__()
|
7 |
|
8 |
-
self.rnn= nn.
|
9 |
self.output= nn.Linear(in_features=240, out_features=24)
|
10 |
|
11 |
def forward(self, x):
|
|
|
5 |
def __init__(self):
|
6 |
super().__init__()
|
7 |
|
8 |
+
self.rnn= nn.RNN(input_size=1080, hidden_size=240,num_layers=1, nonlinearity= 'relu', bias= True)
|
9 |
self.output= nn.Linear(in_features=240, out_features=24)
|
10 |
|
11 |
def forward(self, x):
|
preprocess_data.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import Libaries
|
2 |
+
from torch.utils.data import DataLoader, TensorDataset
|
3 |
+
|
4 |
+
# Map target and label data together in a tuple format
|
5 |
+
def preprocess_data(label_X, target_y):
|
6 |
+
preprocessed= TensorDataset(label_X, target_y)
|
7 |
+
return preprocessed
|
8 |
+
|
9 |
+
# Create data loaders
|
10 |
+
def dataloader(dataset, batch_size, shuffle, num_workers):
|
11 |
+
dataloader= DataLoader(dataset=dataset,
|
12 |
+
batch_size=batch_size,
|
13 |
+
shuffle= shuffle,
|
14 |
+
num_workers=num_workers,
|
15 |
+
)
|
16 |
+
return (dataloader)
|
17 |
+
|
pretrained_gru_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c86e14898765e9ed24b37393ac9ccf874d862a2a31365bc810c4a6d02b19d899
|
3 |
+
size 1675567
|
requirements.txt
CHANGED
@@ -3,8 +3,3 @@ torchvision==0.15.2
|
|
3 |
gradio==3.35.2
|
4 |
nltk == 3.8
|
5 |
scikit-learn == 1.3.0
|
6 |
-
pandas
|
7 |
-
argparse
|
8 |
-
str2bool
|
9 |
-
sentence_transformers
|
10 |
-
transformers
|
|
|
3 |
gradio==3.35.2
|
4 |
nltk == 3.8
|
5 |
scikit-learn == 1.3.0
|
|
|
|
|
|
|
|
|
|
test_gradio.py
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import locale
|
4 |
+
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
5 |
+
import accelerate
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
+
import os
|
9 |
+
import torch
|
10 |
+
from torch import nn
|
11 |
+
import random
|
12 |
+
import gradio as gr
|
13 |
+
import pandas as pd
|
14 |
+
from sklearn.model_selection import train_test_split
|
15 |
+
import time
|
16 |
+
from timeit import default_timer as timer
|
17 |
+
from typing import Tuple, Dict
|
18 |
+
'''
|
19 |
+
import nltk
|
20 |
+
import nltk_u
|
21 |
+
from nltk.corpus import stopwords
|
22 |
+
from nltk.stem import SnowballStemmer
|
23 |
+
from nltk.tokenize import word_tokenize
|
24 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
25 |
+
'''
|
26 |
+
|
27 |
+
#########################################################################################################################
|
28 |
+
'''
|
29 |
+
nltk.download('punkt')
|
30 |
+
nltk.download('stopwords')
|
31 |
+
stemmer= SnowballStemmer(language= 'english')
|
32 |
+
|
33 |
+
# Tokenize text, e.g "I am sick" = ['i', 'am', 'sick']
|
34 |
+
def tokenize(text):
|
35 |
+
return [stemmer.stem(token) for token in word_tokenize(text)]
|
36 |
+
|
37 |
+
# Create stopwords to reduce noise
|
38 |
+
english_stopwords= stopwords.words('english')
|
39 |
+
|
40 |
+
# Create a vectosizer to learn all words in order to convert them into numbers
|
41 |
+
def vectorizer():
|
42 |
+
vectorizer= TfidfVectorizer(tokenizer=tokenize, stop_words=english_stopwords)
|
43 |
+
return vectorizer
|
44 |
+
'''
|
45 |
+
#########################################################################################################################
|
46 |
+
|
47 |
+
class GRU_model(nn.Module):
|
48 |
+
def __init__(self):
|
49 |
+
super().__init__()
|
50 |
+
|
51 |
+
self.rnn= nn.GRU(input_size=384, hidden_size=240,num_layers=2, bias= True).to('cuda') ## nonlinearity= 'relu',
|
52 |
+
self.output= nn.Linear(in_features=240, out_features=24).to('cuda')
|
53 |
+
|
54 |
+
def forward(self, x):
|
55 |
+
y, hidden= self.rnn(x)
|
56 |
+
y = y.to('cuda')
|
57 |
+
x= self.output(y).to('cuda')
|
58 |
+
return(x)
|
59 |
+
#embedder = SentenceTransformer("bge-small-en-v1.5", device="cuda")
|
60 |
+
embedder = SentenceTransformer("C:/OneDrive/git/ARIN_7102/download/bge-small-en-v1.5", device="cuda")
|
61 |
+
#########################################################################################################################
|
62 |
+
if torch.cuda.is_available():
|
63 |
+
device = "cuda"
|
64 |
+
print(f'################################################################# device: {device}#################################################################')
|
65 |
+
else:
|
66 |
+
device = "cpu"
|
67 |
+
'''
|
68 |
+
# Import data
|
69 |
+
df= pd.read_csv('Symptom2Disease_1.csv')
|
70 |
+
|
71 |
+
# Preprocess data
|
72 |
+
df.drop('Unnamed: 0', axis= 1, inplace= True)
|
73 |
+
df.drop_duplicates(inplace= True)
|
74 |
+
train_data, test_data= train_test_split(df, test_size=0.2, random_state=1)
|
75 |
+
'''
|
76 |
+
#########################################################################################################################
|
77 |
+
#vectorizer = TfidfVectorizer(tokenizer=tokenize, stop_words=english_stopwords).fit(train_data.text)
|
78 |
+
#vectorizer= nltk_u.vectorizer()
|
79 |
+
#vectorizer.fit(train_data.text)
|
80 |
+
#from sklearn.feature_extraction.text import TfidfVectorizer
|
81 |
+
#from spacy.lang.de.stop_words import STOP_WORDS
|
82 |
+
#vectorizer = TfidfVectorizer(tokenizer=tokenize, stop_words=list(STOP_WORDS)).fit(train_data.text)
|
83 |
+
#########################################################################################################################
|
84 |
+
# Setup class names
|
85 |
+
class_names= {0: 'Acne', 1: 'Arthritis', 2: 'Bronchial Asthma', 3: 'Cervical spondylosis', 4: 'Chicken pox', 5: 'Common Cold', 6: 'Dengue', 7: 'Dimorphic Hemorrhoids', 8: 'Fungal infection', 9: 'Hypertension',
|
86 |
+
10: 'Impetigo', 11: 'Jaundice', 12: 'Malaria', 13: 'Migraine', 14: 'Pneumonia', 15: 'Psoriasis', 16: 'Typhoid', 17: 'Varicose Veins', 18: 'allergy', 19: 'diabetes', 20: 'drug reaction',
|
87 |
+
21: 'gastroesophageal reflux disease', 22: 'peptic ulcer disease', 23: 'urinary tract infection'}
|
88 |
+
# Disease Advice
|
89 |
+
disease_advice = {
|
90 |
+
'Acne': "Maintain a proper skincare routine, avoid excessive touching of the affected areas, and consider using over-the-counter topical treatments. If severe, consult a dermatologist.",
|
91 |
+
'Arthritis': "Stay active with gentle exercises, manage weight, and consider pain-relief strategies like hot/cold therapy. Consult a rheumatologist for tailored guidance.",
|
92 |
+
'Bronchial Asthma': "Follow prescribed inhaler and medication regimen, avoid triggers like smoke and allergens, and have an asthma action plan. Regular check-ups with a pulmonologist are important.",
|
93 |
+
'Cervical spondylosis': "Maintain good posture, do neck exercises, and use ergonomic support. Physical therapy and pain management techniques might be helpful.",
|
94 |
+
'Chicken pox': "Rest, maintain hygiene, and avoid scratching. Consult a doctor for appropriate antiviral treatment.",
|
95 |
+
'Common Cold': "Get plenty of rest, stay hydrated, and consider over-the-counter remedies for symptom relief. Seek medical attention if symptoms worsen or last long.",
|
96 |
+
'Dengue': "Stay hydrated, rest, and manage fever with acetaminophen. Seek medical care promptly, as dengue can escalate quickly.",
|
97 |
+
'Dimorphic Hemorrhoids': "Follow a high-fiber diet, maintain good hygiene, and consider stool softeners. Consult a doctor if symptoms persist.",
|
98 |
+
'Fungal infection': "Keep the affected area clean and dry, use antifungal creams, and avoid sharing personal items. Consult a dermatologist if it persists.",
|
99 |
+
'Hypertension': "Follow a balanced diet, exercise regularly, reduce salt intake, and take prescribed medications. Regular check-ups with a healthcare provider are important.",
|
100 |
+
'Impetigo': "Keep the affected area clean, use prescribed antibiotics, and avoid close contact. Consult a doctor for proper treatment.",
|
101 |
+
'Jaundice': "Get plenty of rest, maintain hydration, and follow a doctor's advice for diet and medications. Regular monitoring is important.",
|
102 |
+
'Malaria': "Take prescribed antimalarial medications, rest, and manage fever. Seek medical attention for severe cases.",
|
103 |
+
'Migraine': "Identify triggers, manage stress, and consider pain-relief medications. Consult a neurologist for personalized management.",
|
104 |
+
'Pneumonia': "Follow prescribed antibiotics, rest, stay hydrated, and monitor symptoms. Seek immediate medical attention for severe cases.",
|
105 |
+
'Psoriasis': "Moisturize, use prescribed creams, and avoid triggers. Consult a dermatologist for effective management.",
|
106 |
+
'Typhoid': "Take prescribed antibiotics, rest, and stay hydrated. Dietary precautions are important. Consult a doctor for proper treatment.",
|
107 |
+
'Varicose Veins': "Elevate legs, exercise regularly, and wear compression stockings. Consult a vascular specialist for evaluation and treatment options.",
|
108 |
+
'allergy': "Identify triggers, manage exposure, and consider antihistamines. Consult an allergist for comprehensive management.",
|
109 |
+
'diabetes': "Follow a balanced diet, exercise, monitor blood sugar levels, and take prescribed medications. Regular visits to an endocrinologist are essential.",
|
110 |
+
'drug reaction': "Discontinue the suspected medication, seek medical attention if symptoms are severe, and inform healthcare providers about the reaction.",
|
111 |
+
'gastroesophageal reflux disease': "Follow dietary changes, avoid large meals, and consider medications. Consult a doctor for personalized management.",
|
112 |
+
'peptic ulcer disease': "Avoid spicy and acidic foods, take prescribed medications, and manage stress. Consult a gastroenterologist for guidance.",
|
113 |
+
'urinary tract infection': "Stay hydrated, take prescribed antibiotics, and maintain good hygiene. Consult a doctor for appropriate treatment."
|
114 |
+
}
|
115 |
+
|
116 |
+
howto= """Welcome to the <b>Medical Chatbot</b>, powered by Gradio.
|
117 |
+
Currently, the chatbot can WELCOME YOU, PREDICT DISEASE based on your symptoms and SUGGEST POSSIBLE SOLUTIONS AND RECOMENDATIONS, and BID YOU FAREWELL.
|
118 |
+
<b>How to Start:</b> Simply type your messages in the textbox to chat with the Chatbot and press enter!<br><br>
|
119 |
+
The bot will respond based on the best possible answers to your messages."""
|
120 |
+
|
121 |
+
|
122 |
+
# Create the gradio demo
|
123 |
+
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""") as demo:
|
124 |
+
gr.HTML('<h1 align="center">Medical Chatbot: ARIN 7102 project')
|
125 |
+
with gr.Accordion("Follow these Steps to use the Gradio WebUI", open=True):
|
126 |
+
gr.HTML(howto)
|
127 |
+
chatbot = gr.Chatbot()
|
128 |
+
msg = gr.Textbox()
|
129 |
+
clear = gr.ClearButton([msg, chatbot])
|
130 |
+
def respond(message, chat_history, base_model = "gru_model", embedder = SentenceTransformer("C:/OneDrive/git/ARIN_7102/download/bge-small-en-v1.5", device="cuda"), device='cuda'): # "meta-llama/Meta-Llama-3-70B"
|
131 |
+
#base_model =/home/henry/Desktop/ARIN7102/phi-2 # gru_model
|
132 |
+
if base_model == "gru_model":
|
133 |
+
# Model and transforms preparation
|
134 |
+
model= GRU_model()
|
135 |
+
# Load state dict
|
136 |
+
model.load_state_dict(torch.load(f= 'pretrained_gru_model.pth', map_location= device))
|
137 |
+
# Random greetings in list format
|
138 |
+
greetings = ["hello!",'hello', 'hii !', 'hi', "hi there!", "hi there!", "heyy", 'good morning', 'good afternoon', 'good evening', "hey", "how are you", "how are you?", "how is it going", "how is it going?", "what's up?",
|
139 |
+
"how are you?", "hey, how are you?", "what is popping", "good to see you!", "howdy!", "hi, nice to meet you.", "hiya!", "hi", "hi, what's new?", "hey, how's your day?", "hi, how have you been?", "greetings"]
|
140 |
+
# Random Greetings responses
|
141 |
+
greetings_responses = ["Thank you for using our medical chatbot. Please provide the symptoms you're experiencing, and I'll do my best to predict the possible disease.",
|
142 |
+
"Hello! I'm here to help you with medical predictions based on your symptoms. Please describe your symptoms in as much detail as possible.",
|
143 |
+
"Greetings! I am a specialized medical chatbot trained to predict potential diseases based on the symptoms you provide. Kindly list your symptoms explicitly.",
|
144 |
+
"Welcome to the medical chatbot. To assist you accurately, please share your symptoms in explicit detail.",
|
145 |
+
"Hi there! I'm a medical chatbot specialized in analyzing symptoms to suggest possible diseases. Please provide your symptoms explicitly.",
|
146 |
+
"Hey! I'm your medical chatbot. Describe your symptoms with as much detail as you can, and I'll generate potential disease predictions.",
|
147 |
+
"How can I assist you today? I'm a medical chatbot trained to predict diseases based on symptoms. Please be explicit while describing your symptoms.",
|
148 |
+
"Hello! I'm a medical chatbot capable of predicting diseases based on the symptoms you provide. Your explicit symptom description will help me assist you better.",
|
149 |
+
"Greetings! I'm here to help with medical predictions. Describe your symptoms explicitly, and I'll offer insights into potential diseases.",
|
150 |
+
"Hi, I'm the medical chatbot. I've been trained to predict diseases from symptoms. The more explicit you are about your symptoms, the better I can assist you.",
|
151 |
+
"Hi, I specialize in medical predictions based on symptoms. Kindly provide detailed symptoms for accurate disease predictions.",
|
152 |
+
"Hello! I'm a medical chatbot with expertise in predicting diseases from symptoms. Please describe your symptoms explicitly to receive accurate insights."]
|
153 |
+
# Random goodbyes
|
154 |
+
goodbyes = ["farewell!",'bye', 'goodbye','good-bye', 'good bye', 'bye', 'thank you', 'later', "take care!",
|
155 |
+
"see you later!", 'see you', 'see ya', 'see-you', 'thanks', 'thank', 'bye bye', 'byebye'
|
156 |
+
"catch you on the flip side!", "adios!",
|
157 |
+
"goodbye for now!", "till we meet again!",
|
158 |
+
"so long!", "hasta la vista!",
|
159 |
+
"bye-bye!", "keep in touch!",
|
160 |
+
"toodles!", "ciao!",
|
161 |
+
"later, gator!", "stay safe and goodbye!",
|
162 |
+
"peace out!", "until next time!", "off I go!"]
|
163 |
+
# Random Goodbyes responses
|
164 |
+
goodbyes_replies = ["Take care of yourself! If you have more questions, don't hesitate to reach out.", "Stay well! Remember, I'm here if you need further medical advice.",
|
165 |
+
"Goodbye for now! Don't hesitate to return if you need more information in the future.", "Wishing you good health ahead! Feel free to come back if you have more concerns.",
|
166 |
+
"Farewell! If you have more symptoms or questions, don't hesitate to consult again.", "Take care and stay informed about your health. Feel free to chat anytime.",
|
167 |
+
"Bye for now! Remember, your well-being is a priority. Don't hesitate to ask if needed.", "Have a great day ahead! If you need medical guidance later on, I'll be here.",
|
168 |
+
"Stay well and take it easy! Reach out if you need more medical insights.", "Until next time! Prioritize your health and reach out if you need assistance.",
|
169 |
+
"Goodbye! Your health matters. Feel free to return if you have more health-related queries.", "Stay healthy and stay curious about your health! If you need more info, just ask.",
|
170 |
+
"Wishing you wellness on your journey! If you have more questions, I'm here to help.", "Stay well and stay proactive about your health! If you have more queries, feel free to ask.",
|
171 |
+
"Take care and remember, your health is important. Don't hesitate to reach out if needed.", "Goodbye for now! Stay informed and feel free to consult if you require medical advice.",
|
172 |
+
"Farewell! Remember, I'm here whenever you need reliable medical information.", "Bye for now! Stay vigilant about your health and don't hesitate to return if necessary.",
|
173 |
+
"Take care and keep your well-being a priority! Reach out if you have more health questions.", "Wishing you good health ahead! Don't hesitate to chat if you need medical insights.",
|
174 |
+
"Goodbye! Stay well and remember, I'm here to assist you with medical queries."]
|
175 |
+
|
176 |
+
if message.lower() in greetings:
|
177 |
+
bot_message= random.choice(greetings_responses)
|
178 |
+
elif message.lower() in goodbyes:
|
179 |
+
bot_message= random.choice(goodbyes_replies)
|
180 |
+
else:
|
181 |
+
#transform_text= vectorizer.transform([message])
|
182 |
+
|
183 |
+
embedder = SentenceTransformer("bge-small-en-v1.5", device="cuda")
|
184 |
+
sentence_embeddings = embedder.encode(message)
|
185 |
+
sentence_embeddings = torch.from_numpy(sentence_embeddings).float().to(device).unsqueeze(dim=0)
|
186 |
+
|
187 |
+
#transform_text= torch.tensor(transform_text.toarray()).to(torch.float32)
|
188 |
+
model.eval()
|
189 |
+
|
190 |
+
with torch.inference_mode():
|
191 |
+
y_logits=model(sentence_embeddings.to(device))
|
192 |
+
pred_prob= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
193 |
+
test_pred= class_names[pred_prob.item()]
|
194 |
+
bot_message = f' Based on your symptoms, I believe you are having {test_pred} and I would advice you {disease_advice[test_pred]}'
|
195 |
+
|
196 |
+
|
197 |
+
else:
|
198 |
+
|
199 |
+
# define the model and tokenizer.
|
200 |
+
# model = PhiForCausalLM.from_pretrained(base_model)
|
201 |
+
model = AutoModelForCausalLM.from_pretrained(base_model)
|
202 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
203 |
+
|
204 |
+
# feel free to change the prompt to your liking.
|
205 |
+
#prompt = f"Patient: coercive spondylitis, pain in the lumbosacral area when turning over during sleep at night, no pain in any other part of the body.
|
206 |
+
#/n Doctor: It shouldn't be a problem, but it's better to upload the images. /n Patient: {message} /n Doctor:"
|
207 |
+
output_termination = "\nOutput:"
|
208 |
+
prompt = f"Instruct: Hi, i am patient, {message} what is wrong with my body? What drugs should i take, and what is the side-effect of this drug? What should i do?{output_termination}"
|
209 |
+
print(prompt)
|
210 |
+
# apply the tokenizer.
|
211 |
+
tokens = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
|
212 |
+
#tokens = tokens.to(device)
|
213 |
+
#eos_token_id = tokenizer.eos_token_id
|
214 |
+
# use the model to generate new tokens.
|
215 |
+
generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=2000, eos_token_id=50256, pad_token_id=50256)
|
216 |
+
|
217 |
+
# Find the position of "Output:" and extract the text after it
|
218 |
+
generated_text = tokenizer.batch_decode(generated_output)[0]
|
219 |
+
# Split the text at "Output:" and take the second part
|
220 |
+
split_text = generated_text.split("Output:", 1)
|
221 |
+
bot_message = split_text[1].strip() if len(split_text) > 1 else ""
|
222 |
+
bot_message = bot_message.replace("<|endoftext|>", "").strip()
|
223 |
+
#return bot_message
|
224 |
+
#chat_history.append((message, bot_message))
|
225 |
+
#time.sleep(2)
|
226 |
+
#return "", chat_history
|
227 |
+
chat_history.append((message, bot_message))
|
228 |
+
time.sleep(2)
|
229 |
+
#return bot_message
|
230 |
+
return "", chat_history
|
231 |
+
|
232 |
+
|
233 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
234 |
+
# Launch the demo
|
235 |
+
demo.launch(share=True)
|
236 |
+
|
237 |
+
#msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
238 |
+
# Launch the demo
|
239 |
+
#demo.launch()
|
240 |
+
|
241 |
+
|
242 |
+
#gr.ChatInterface(respond).launch()
|
243 |
+
|
244 |
+
|
245 |
+
#a = respond('hi', list())
|
246 |
+
#a = respond("Hi, good morning")
|
247 |
+
#a = respond("My skin has been peeling, especially on my knees, elbows, and scalp. This peeling is often accompanied by a burning or stinging sensation.")
|
248 |
+
#a = respond("I have blurry vision, and it seems to be getting worse. I'm continuously fatigued and worn out. I also occasionally have acute lightheadedness and vertigo, can you give me some advice?")
|
249 |
+
#print(a)
|
250 |
+
#gr.ChatInterface(respond).launch()
|
251 |
+
#demo = gr.ChatInterface(fn=random_response, examples=[{"text": "Hello", "files": []}], title="Echo Bot", multimodal=True)
|
252 |
+
#demo.launch()
|
train.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
|
2 |
import torch
|
3 |
from torch import nn
|
4 |
-
import nltk_u
|
5 |
import pandas as pd
|
6 |
from pathlib import Path
|
7 |
import matplotlib.pyplot as plt
|
@@ -26,12 +25,13 @@ class RNN_model(nn.Module):
|
|
26 |
def __init__(self):
|
27 |
super().__init__()
|
28 |
|
29 |
-
self.rnn= nn.RNN(input_size=1477, hidden_size=240,num_layers=1, nonlinearity= 'relu', bias= True).to(
|
30 |
-
self.output= nn.Linear(in_features=240, out_features=24).to(
|
31 |
|
32 |
def forward(self, x):
|
33 |
y, hidden= self.rnn(x)
|
34 |
-
|
|
|
35 |
return(x)
|
36 |
#####################################################################################################################
|
37 |
# import data
|
@@ -85,12 +85,12 @@ if torch.cuda.is_available():
|
|
85 |
else:
|
86 |
device = "cpu"
|
87 |
#####################################################################################################################
|
88 |
-
model= RNN_model()
|
89 |
loss_fn= CrossEntropyLoss()
|
90 |
optimizer= torch.optim.SGD(model.parameters(), lr= 0.1, weight_decay=0)
|
91 |
#####################################################################################################################
|
92 |
## train model
|
93 |
-
epoch=
|
94 |
|
95 |
results= {
|
96 |
"train_loss": [],
|
@@ -103,13 +103,13 @@ for i in range(epoch):
|
|
103 |
train_loss=0
|
104 |
train_acc=0
|
105 |
for batch, (X, y) in enumerate(train_dataloader):
|
106 |
-
X, y= X.to(
|
107 |
# Train the model
|
108 |
model.train()
|
109 |
optimizer.zero_grad()
|
110 |
-
y_logits= model(X)
|
111 |
# Calculate the loss
|
112 |
-
loss= loss_fn(y_logits, y)
|
113 |
train_loss += loss
|
114 |
# ypreds
|
115 |
y_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
@@ -128,11 +128,11 @@ for i in range(epoch):
|
|
128 |
model.eval()
|
129 |
with torch.inference_mode():
|
130 |
for X, y in test_dataloader:
|
131 |
-
X, y= X.to(
|
132 |
-
y_logits= model(X)
|
133 |
-
loss= loss_fn(y_logits, y)
|
134 |
test_loss += loss
|
135 |
-
test_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
136 |
accuracy = accuracy_score(y.cpu(), test_preds.cpu())
|
137 |
test_acc += accuracy
|
138 |
test_loss /= len(test_dataloader)
|
@@ -162,17 +162,18 @@ plt.legend()
|
|
162 |
#####################################################################################################################
|
163 |
new_data= 'I have been having burning pain anytime i am peeing, what could be the issue?'
|
164 |
transformed_new= vectorizer.transform([new_data])
|
165 |
-
transformed_new= torch.tensor(transformed_new.toarray()).to(torch.float32)
|
166 |
model.eval()
|
167 |
with torch.inference_mode():
|
168 |
-
y_logits=model(transformed_new)
|
169 |
-
test_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
170 |
test_pred= target_dict[test_preds.item()]
|
171 |
print(f'based on your symptoms, I believe you are having {test_pred}')
|
172 |
|
173 |
-
target_dir_path = Path('
|
174 |
target_dir_path.mkdir(parents=True,
|
175 |
exist_ok=True)
|
176 |
-
model_path= target_dir_path / '
|
177 |
torch.save(obj=model.state_dict(),f= model_path)
|
|
|
178 |
#####################################################################################################################
|
|
|
1 |
|
2 |
import torch
|
3 |
from torch import nn
|
|
|
4 |
import pandas as pd
|
5 |
from pathlib import Path
|
6 |
import matplotlib.pyplot as plt
|
|
|
25 |
def __init__(self):
|
26 |
super().__init__()
|
27 |
|
28 |
+
self.rnn= nn.RNN(input_size=1477, hidden_size=240,num_layers=1, nonlinearity= 'relu', bias= True).to('cuda')
|
29 |
+
self.output= nn.Linear(in_features=240, out_features=24).to('cuda')
|
30 |
|
31 |
def forward(self, x):
|
32 |
y, hidden= self.rnn(x)
|
33 |
+
y = y.to('cuda')
|
34 |
+
x= self.output(y).to('cuda')
|
35 |
return(x)
|
36 |
#####################################################################################################################
|
37 |
# import data
|
|
|
85 |
else:
|
86 |
device = "cpu"
|
87 |
#####################################################################################################################
|
88 |
+
model= RNN_model().to(device)
|
89 |
loss_fn= CrossEntropyLoss()
|
90 |
optimizer= torch.optim.SGD(model.parameters(), lr= 0.1, weight_decay=0)
|
91 |
#####################################################################################################################
|
92 |
## train model
|
93 |
+
epoch= 300
|
94 |
|
95 |
results= {
|
96 |
"train_loss": [],
|
|
|
103 |
train_loss=0
|
104 |
train_acc=0
|
105 |
for batch, (X, y) in enumerate(train_dataloader):
|
106 |
+
X, y= X.to('cuda'), y.to('cuda')
|
107 |
# Train the model
|
108 |
model.train()
|
109 |
optimizer.zero_grad()
|
110 |
+
y_logits= model(X).to('cuda')
|
111 |
# Calculate the loss
|
112 |
+
loss= loss_fn(y_logits, y).to('cuda')
|
113 |
train_loss += loss
|
114 |
# ypreds
|
115 |
y_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
|
|
|
128 |
model.eval()
|
129 |
with torch.inference_mode():
|
130 |
for X, y in test_dataloader:
|
131 |
+
X, y= X.to('cuda'), y.to('cuda')
|
132 |
+
y_logits= model(X).to('cuda')
|
133 |
+
loss= loss_fn(y_logits, y).to('cuda')
|
134 |
test_loss += loss
|
135 |
+
test_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1).to('cuda')
|
136 |
accuracy = accuracy_score(y.cpu(), test_preds.cpu())
|
137 |
test_acc += accuracy
|
138 |
test_loss /= len(test_dataloader)
|
|
|
162 |
#####################################################################################################################
|
163 |
new_data= 'I have been having burning pain anytime i am peeing, what could be the issue?'
|
164 |
transformed_new= vectorizer.transform([new_data])
|
165 |
+
transformed_new= torch.tensor(transformed_new.toarray()).to(torch.float32).to('cuda')
|
166 |
model.eval()
|
167 |
with torch.inference_mode():
|
168 |
+
y_logits=model(transformed_new).to('cuda')
|
169 |
+
test_preds= torch.argmax(torch.softmax(y_logits, dim=1), dim=1).to('cuda')
|
170 |
test_pred= target_dict[test_preds.item()]
|
171 |
print(f'based on your symptoms, I believe you are having {test_pred}')
|
172 |
|
173 |
+
target_dir_path = Path('')
|
174 |
target_dir_path.mkdir(parents=True,
|
175 |
exist_ok=True)
|
176 |
+
model_path= target_dir_path / 'pretrained_gru_model.pth'
|
177 |
torch.save(obj=model.state_dict(),f= model_path)
|
178 |
+
print('########### model saved ###########')
|
179 |
#####################################################################################################################
|