Spaces:
Sleeping
Sleeping
Upload herbal_expert.py
Browse files- herbal_expert.py +49 -173
herbal_expert.py
CHANGED
@@ -22,104 +22,6 @@ Entrez.email = "[email protected]"
|
|
22 |
nltk.download('wordnet')
|
23 |
|
24 |
|
25 |
-
class CustomLLM(LLM):
|
26 |
-
n: int
|
27 |
-
|
28 |
-
@property
|
29 |
-
def _llm_type(self) -> str:
|
30 |
-
return "custom"
|
31 |
-
|
32 |
-
def _call(
|
33 |
-
self,
|
34 |
-
prompt: str,
|
35 |
-
stop: Optional[List[str]] = None,
|
36 |
-
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
37 |
-
**kwargs: Any,
|
38 |
-
) -> str:
|
39 |
-
"""
|
40 |
-
The _call function is the function that will be called by the user.
|
41 |
-
It should take in a prompt and return a response.
|
42 |
-
|
43 |
-
:param self: Represent the instance of the class
|
44 |
-
:param prompt: str: Pass the prompt to the model
|
45 |
-
:param stop: Optional[List[str]]: Define the stop words
|
46 |
-
:param run_manager: Optional[CallbackManagerForLLMRun]: Pass the run manager to the call function
|
47 |
-
:param **kwargs: Any: Pass in any additional parameters that may be needed for the function
|
48 |
-
:param : Pass the prompt to the model
|
49 |
-
:return: A string that is the response of gpt-3 to the prompt
|
50 |
-
"""
|
51 |
-
data = {
|
52 |
-
"messages": [
|
53 |
-
{
|
54 |
-
"role": "user",
|
55 |
-
"content": prompt
|
56 |
-
}
|
57 |
-
],
|
58 |
-
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 700, "stream": False
|
59 |
-
}
|
60 |
-
|
61 |
-
response = requests.post(ngrok_url + "v1/chat/completions",
|
62 |
-
headers={"Content-Type": "application/json"}, json=data)
|
63 |
-
return json.loads(response.text)['choices'][0]['message']['content']
|
64 |
-
|
65 |
-
|
66 |
-
class CustomPromptTemplate(StringPromptTemplate):
|
67 |
-
template: str
|
68 |
-
|
69 |
-
def format(self, **kwargs) -> str:
|
70 |
-
return self.template.format(**kwargs)
|
71 |
-
|
72 |
-
|
73 |
-
class CustomOutputParser(AgentOutputParser):
|
74 |
-
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
|
75 |
-
return AgentFinish(return_values={"output": llm_output}, log=llm_output)
|
76 |
-
|
77 |
-
|
78 |
-
bare_output_parser = CustomOutputParser()
|
79 |
-
question_decompose_prompt = """### Instruction: Identify and list the keywords that capture the essence of the
|
80 |
-
question. List them as a string separated by commas. Focus on the question. The first word should be the most
|
81 |
-
important keyword and the last word should be the least important keyword.
|
82 |
-
|
83 |
-
Chat History: {history}
|
84 |
-
Question: {input}
|
85 |
-
|
86 |
-
YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
|
87 |
-
### Response: Keywords: """
|
88 |
-
|
89 |
-
prompt_with_history = CustomPromptTemplate(
|
90 |
-
template=question_decompose_prompt,
|
91 |
-
tools=[],
|
92 |
-
input_variables=["input", "history"]
|
93 |
-
)
|
94 |
-
|
95 |
-
|
96 |
-
def get_agent_executor():
|
97 |
-
"""
|
98 |
-
The get_agent_executor function is a factory function that returns an AgentExecutor object.
|
99 |
-
The AgentExecutor object is the main interface for interacting with the agent.
|
100 |
-
agent.run('query') is the main method for interacting with the agent.
|
101 |
-
|
102 |
-
:return: An AgentExecutor object
|
103 |
-
"""
|
104 |
-
|
105 |
-
llm = CustomLLM(n=10)
|
106 |
-
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
|
107 |
-
question_decompose_agent = LLMSingleActionAgent(
|
108 |
-
llm_chain=question_decompose_chain,
|
109 |
-
output_parser=bare_output_parser,
|
110 |
-
stop=["\nObservation:"],
|
111 |
-
allowed_tools=[]
|
112 |
-
)
|
113 |
-
memory = ConversationBufferWindowMemory(k=10)
|
114 |
-
|
115 |
-
return AgentExecutor.from_agent_and_tools(
|
116 |
-
agent=question_decompose_agent,
|
117 |
-
tools=[],
|
118 |
-
verbose=True,
|
119 |
-
memory=memory
|
120 |
-
)
|
121 |
-
|
122 |
-
|
123 |
def get_num_citations(pmid: str):
|
124 |
"""
|
125 |
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
|
@@ -225,64 +127,54 @@ def format_prompt_and_query(prompt, **kwargs):
|
|
225 |
|
226 |
|
227 |
class HerbalExpert:
|
228 |
-
def __init__(self
|
229 |
-
self.qd_chain = qd_chain
|
230 |
self.wnl = WordNetLemmatizer()
|
231 |
self.default_questions = [
|
232 |
"How is chamomile traditionally used in herbal medicine?",
|
233 |
-
"What are the potential side effects or interactions of consuming echinacea?",
|
234 |
"Can you explain the different methods of consuming lavender for health benefits?",
|
235 |
"Which herbs are commonly known for their anti-inflammatory properties?",
|
236 |
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?",
|
237 |
"Are there any natural herbs that could support better sleep?",
|
238 |
"What cannabis or hemp products would you recommend for chronic pain relief?",
|
239 |
"I'm looking to boost my immune system. Are there any specific herbs or supplements that could help?",
|
240 |
-
"Which herbs or supplements are recommended for enhancing cognitive functions and memory?"
|
|
|
241 |
]
|
242 |
-
# og = Original, qa = Question Asking, ri = Response Improvement
|
243 |
self.prompts = {
|
244 |
-
"
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
|
|
267 |
}
|
268 |
|
269 |
def process_query_words(self, question_words: str, answer_words: str):
|
270 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
271 |
-
"""
|
272 |
-
The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
|
273 |
-
The function first splits the input strings into lists, then removes any duplicate entries from the list. It
|
274 |
-
then iterates through each word in the list and strips it of whitespace before passing it to
|
275 |
-
WordNetLemmatizer().lemmatize() to return its lemma (base form). The function also removes any vague words
|
276 |
-
that are not useful for searching PubMed.
|
277 |
-
|
278 |
-
:param self: Represent the instance of the class
|
279 |
-
:param question_words: str: Get the question words from the user
|
280 |
-
:param answer_words: str: Add the answer words to the list of words that will be searched for in pubmed
|
281 |
-
:return: A list of words that are not vague
|
282 |
-
"""
|
283 |
-
|
284 |
vague_words = ['recommendation', 'recommendations', 'products', 'product']
|
285 |
-
words = question_words.lower().split(",") + answer_words.lower().split(
|
|
|
286 |
|
287 |
final_list = []
|
288 |
for word in words:
|
@@ -293,22 +185,10 @@ class HerbalExpert:
|
|
293 |
return list(set(final_list))
|
294 |
|
295 |
def convert_question_into_words(self, question: str):
|
296 |
-
"""
|
297 |
-
The convert_question_into_words function takes a question and returns the words that are in the question.
|
298 |
-
The function first decomposes the original answer into its component parts, then it decomposes
|
299 |
-
each of those components into their own component parts. It then uses these decomposed answers to
|
300 |
-
find all the words that are in both questions and answers.
|
301 |
-
|
302 |
-
:param self: Make the function a method of the class
|
303 |
-
:param question: str: Pass in the question that is being asked
|
304 |
-
:return: A tuple of two elements
|
305 |
-
:doc-author: Trelent
|
306 |
-
"""
|
307 |
-
|
308 |
original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
|
309 |
print("Original Answer: ", original_answer)
|
310 |
|
311 |
-
question_decompose = self.
|
312 |
print("Question Decompose: ", question_decompose)
|
313 |
|
314 |
original_answer_decompose = format_prompt_and_query(self.prompts["ans_decompose_prompt"],
|
@@ -319,19 +199,6 @@ class HerbalExpert:
|
|
319 |
return words, original_answer
|
320 |
|
321 |
def query_expert(self, question: str = None):
|
322 |
-
"""
|
323 |
-
The query_expert function takes a question as input and returns the expert's response to that question. The
|
324 |
-
function first converts the question into keywords, then uses those keywords to search PubMed for relevant
|
325 |
-
articles. If no articles are found, it returns the original response from the expert (i.e., without context).
|
326 |
-
If at least one article is found, it asks two follow-up questions: 1) "What do you think of this answer
|
327 |
-
in light of these new findings?" and 2) "How would you improve your answer based on these new
|
328 |
-
findings?". It then returns both responses.
|
329 |
-
|
330 |
-
:param self: Represent the instance of the class
|
331 |
-
:param question: str: Pass in the question that is to be asked
|
332 |
-
:return: A dictionary with the question, response and run info
|
333 |
-
"""
|
334 |
-
|
335 |
question = random.choice(self.default_questions) if question is None else question
|
336 |
print("Question: ", question)
|
337 |
|
@@ -339,7 +206,7 @@ class HerbalExpert:
|
|
339 |
print("Keywords: ", keywords)
|
340 |
|
341 |
context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
|
342 |
-
print(context)
|
343 |
|
344 |
if len(context) == 0:
|
345 |
return {
|
@@ -349,9 +216,19 @@ class HerbalExpert:
|
|
349 |
}
|
350 |
|
351 |
contextual_response = format_prompt_and_query(self.prompts["qa_prompt"], question=question, context=context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
improved_response = format_prompt_and_query(self.prompts["ri_prompt"], question=question,
|
353 |
answer=original_response, answer2=contextual_response)
|
354 |
-
|
|
|
355 |
return {
|
356 |
"question": question,
|
357 |
"response": improved_response,
|
@@ -359,11 +236,10 @@ class HerbalExpert:
|
|
359 |
}
|
360 |
|
361 |
|
362 |
-
herbal_expert = HerbalExpert(
|
363 |
|
364 |
if __name__ == '__main__':
|
365 |
-
herbal_expert = HerbalExpert(
|
366 |
-
answer = herbal_expert.query_expert(
|
367 |
-
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
|
368 |
print(answer['response'])
|
369 |
-
# return to api? who knows
|
|
|
22 |
nltk.download('wordnet')
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def get_num_citations(pmid: str):
|
26 |
"""
|
27 |
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
|
|
|
127 |
|
128 |
|
129 |
class HerbalExpert:
|
130 |
+
def __init__(self):
|
|
|
131 |
self.wnl = WordNetLemmatizer()
|
132 |
self.default_questions = [
|
133 |
"How is chamomile traditionally used in herbal medicine?",
|
134 |
+
"What are the potential side effects or interactions of consuming echinacea alongside finasteride?",
|
135 |
"Can you explain the different methods of consuming lavender for health benefits?",
|
136 |
"Which herbs are commonly known for their anti-inflammatory properties?",
|
137 |
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?",
|
138 |
"Are there any natural herbs that could support better sleep?",
|
139 |
"What cannabis or hemp products would you recommend for chronic pain relief?",
|
140 |
"I'm looking to boost my immune system. Are there any specific herbs or supplements that could help?",
|
141 |
+
"Which herbs or supplements are recommended for enhancing cognitive functions and memory?",
|
142 |
+
"What natural (herbal) medicinal molecule is the best alternative for pharmaceutical drugs, e.g., opiates?"
|
143 |
]
|
144 |
+
# qd = Question Decompose, og = Original, qa = Question Asking, ri = Response Improvement
|
145 |
self.prompts = {
|
146 |
+
"qd_prompt": """### Instruction: Identify and list the keywords that capture the essence of the question. List them as a string separated by commas. Focus on the question. Order the keyword by importance. The first keyword should be the most important keyword in the question and the last keyword should be the least important keyword.
|
147 |
+
Question: {input}
|
148 |
+
|
149 |
+
YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
|
150 |
+
### Response: Keywords: """,
|
151 |
+
|
152 |
+
"og_answer_prompt": """### Instruction: Answer the following question to the best of your ability. Question: {question}
|
153 |
+
### Response: Answer: """,
|
154 |
+
|
155 |
+
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important keywords that capture the essence of the text. If there's a list of products, choose the top 2 products. Your response should be a list of only 2 keywords separated by commas.
|
156 |
+
Text: {original_answer}
|
157 |
+
### Response: Keywords: """,
|
158 |
+
|
159 |
+
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the context is relevant to the question. If the context doesn't help answer the question, ONLY respond with "I don't know".
|
160 |
+
|
161 |
+
Question: {question}
|
162 |
+
Context: {context}
|
163 |
+
### Response: Answer: """,
|
164 |
+
|
165 |
+
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a response that is more safe, informative and intelligent than the original answer and imparts knowledge from both the old answer and from the context ONLY if it helps answer the question.
|
166 |
+
|
167 |
+
Question: {question}
|
168 |
+
Old Answer: {answer}
|
169 |
+
Context: {answer2}
|
170 |
+
### Response: Improved Answer: """
|
171 |
}
|
172 |
|
173 |
def process_query_words(self, question_words: str, answer_words: str):
|
174 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
vague_words = ['recommendation', 'recommendations', 'products', 'product']
|
176 |
+
words = question_words.lower().split(",")[:4] + answer_words.lower().split(
|
177 |
+
",") # limit question words to 4 (since the number is unbounded)
|
178 |
|
179 |
final_list = []
|
180 |
for word in words:
|
|
|
185 |
return list(set(final_list))
|
186 |
|
187 |
def convert_question_into_words(self, question: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
|
189 |
print("Original Answer: ", original_answer)
|
190 |
|
191 |
+
question_decompose = format_prompt_and_query(self.prompts["qd_prompt"], input=question)
|
192 |
print("Question Decompose: ", question_decompose)
|
193 |
|
194 |
original_answer_decompose = format_prompt_and_query(self.prompts["ans_decompose_prompt"],
|
|
|
199 |
return words, original_answer
|
200 |
|
201 |
def query_expert(self, question: str = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
question = random.choice(self.default_questions) if question is None else question
|
203 |
print("Question: ", question)
|
204 |
|
|
|
206 |
print("Keywords: ", keywords)
|
207 |
|
208 |
context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
|
209 |
+
print("Context: ", context)
|
210 |
|
211 |
if len(context) == 0:
|
212 |
return {
|
|
|
216 |
}
|
217 |
|
218 |
contextual_response = format_prompt_and_query(self.prompts["qa_prompt"], question=question, context=context)
|
219 |
+
# if "I don't know" in contextual_response:
|
220 |
+
# return {
|
221 |
+
# "question": question,
|
222 |
+
# "response": original_response,
|
223 |
+
# "info": "Irrelevant context found"
|
224 |
+
# }
|
225 |
+
|
226 |
+
print()
|
227 |
+
print("Contextual Response: ", contextual_response)
|
228 |
improved_response = format_prompt_and_query(self.prompts["ri_prompt"], question=question,
|
229 |
answer=original_response, answer2=contextual_response)
|
230 |
+
print()
|
231 |
+
print("Improved Response: ", improved_response)
|
232 |
return {
|
233 |
"question": question,
|
234 |
"response": improved_response,
|
|
|
236 |
}
|
237 |
|
238 |
|
239 |
+
herbal_expert = HerbalExpert()
|
240 |
|
241 |
if __name__ == '__main__':
|
242 |
+
herbal_expert = HerbalExpert()
|
243 |
+
answer = herbal_expert.query_expert()
|
|
|
244 |
print(answer['response'])
|
245 |
+
# # return to api? who knows
|