Spaces:
Sleeping
Sleeping
Upload herbal_expert.py
Browse files- herbal_expert.py +80 -45
herbal_expert.py
CHANGED
@@ -17,6 +17,7 @@ from langchain.callbacks.manager import CallbackManagerForLLMRun
|
|
17 |
from typing import List, Union, Optional, Any
|
18 |
|
19 |
ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
|
|
|
20 |
Entrez.email = "[email protected]"
|
21 |
nltk.download('wordnet')
|
22 |
|
@@ -41,14 +42,13 @@ class CustomLLM(LLM):
|
|
41 |
"content": prompt
|
42 |
}
|
43 |
],
|
44 |
-
"stop": ["### Instruction:"], "temperature": 0, "max_tokens":
|
45 |
}
|
46 |
|
47 |
response = requests.post(ngrok_url + "v1/chat/completions",
|
48 |
headers={"Content-Type": "application/json"}, json=data)
|
49 |
return json.loads(response.text)['choices'][0]['message']['content']
|
50 |
|
51 |
-
# return make_inference_call(prompt)
|
52 |
|
53 |
|
54 |
class CustomPromptTemplate(StringPromptTemplate):
|
@@ -64,41 +64,52 @@ class CustomOutputParser(AgentOutputParser):
|
|
64 |
|
65 |
|
66 |
bare_output_parser = CustomOutputParser()
|
67 |
-
question_decompose_prompt = """
|
68 |
-
|
69 |
-
|
|
|
|
|
70 |
Question: {input}
|
71 |
|
72 |
-
|
73 |
-
### Response:
|
74 |
-
"""
|
75 |
|
76 |
prompt_with_history = CustomPromptTemplate(
|
77 |
template=question_decompose_prompt,
|
78 |
tools=[],
|
79 |
input_variables=["input", "history"]
|
80 |
)
|
81 |
-
# %%
|
82 |
-
llm = CustomLLM(n=10)
|
83 |
-
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
|
84 |
-
|
85 |
-
question_decompose_agent = LLMSingleActionAgent(
|
86 |
-
llm_chain=question_decompose_chain,
|
87 |
-
output_parser=bare_output_parser,
|
88 |
-
stop=["\nObservation:"],
|
89 |
-
allowed_tools=[]
|
90 |
-
)
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
|
101 |
def get_num_citations(pmid: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
citations_xml = Entrez.read(
|
103 |
Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
|
104 |
|
@@ -210,32 +221,44 @@ class HerbalExpert:
|
|
210 |
]
|
211 |
# og = Original, qa = Question Asking, ri = Response Improvement
|
212 |
self.prompts = {
|
213 |
-
"og_answer_prompt": """### Instruction: Answer the following question
|
214 |
-
|
215 |
|
216 |
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
|
222 |
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
|
228 |
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
}
|
236 |
|
237 |
def process_query_words(self, question_words: str, answer_words: str):
|
238 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
vague_words = ['recommendation', 'recommendations', 'products', 'product']
|
240 |
words = question_words.lower().split(",") + answer_words.lower().split(",")
|
241 |
|
@@ -248,6 +271,18 @@ class HerbalExpert:
|
|
248 |
return list(set(final_list))
|
249 |
|
250 |
def convert_question_into_words(self, question: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
|
252 |
print("Original Answer: ", original_answer)
|
253 |
|
@@ -262,14 +297,14 @@ class HerbalExpert:
|
|
262 |
return words, original_answer
|
263 |
|
264 |
def query_expert(self, question: str = None):
|
265 |
-
question = self.default_questions
|
266 |
-
random.randint(0, len(self.default_questions) - 1)] if question is None else question
|
267 |
print("Question: ", question)
|
268 |
|
269 |
keywords, original_response = self.convert_question_into_words(question)
|
270 |
print("Keywords: ", keywords)
|
271 |
|
272 |
context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
|
|
|
273 |
|
274 |
if len(context) == 0:
|
275 |
return {
|
@@ -289,11 +324,11 @@ class HerbalExpert:
|
|
289 |
}
|
290 |
|
291 |
|
292 |
-
herbal_expert = HerbalExpert(
|
293 |
|
294 |
|
295 |
if __name__ == '__main__':
|
296 |
-
herbal_expert = HerbalExpert(
|
297 |
answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
|
298 |
print(answer['response'])
|
299 |
# return to api? who knows
|
|
|
17 |
from typing import List, Union, Optional, Any
|
18 |
|
19 |
ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
|
20 |
+
ngrok_url = 'http://localhost:1234/'
|
21 |
Entrez.email = "[email protected]"
|
22 |
nltk.download('wordnet')
|
23 |
|
|
|
42 |
"content": prompt
|
43 |
}
|
44 |
],
|
45 |
+
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 700, "stream": False
|
46 |
}
|
47 |
|
48 |
response = requests.post(ngrok_url + "v1/chat/completions",
|
49 |
headers={"Content-Type": "application/json"}, json=data)
|
50 |
return json.loads(response.text)['choices'][0]['message']['content']
|
51 |
|
|
|
52 |
|
53 |
|
54 |
class CustomPromptTemplate(StringPromptTemplate):
|
|
|
64 |
|
65 |
|
66 |
bare_output_parser = CustomOutputParser()
|
67 |
+
question_decompose_prompt = """### Instruction: Identify and list the keywords that capture the essence of the
|
68 |
+
question. List them as a string separated by commas. Focus on the question. The first word should be the most
|
69 |
+
important keyword and the last word should be the least important keyword.
|
70 |
+
|
71 |
+
Chat History: {history}
|
72 |
Question: {input}
|
73 |
|
74 |
+
YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
|
75 |
+
### Response: Keywords: """
|
|
|
76 |
|
77 |
prompt_with_history = CustomPromptTemplate(
|
78 |
template=question_decompose_prompt,
|
79 |
tools=[],
|
80 |
input_variables=["input", "history"]
|
81 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
|
84 |
+
def get_agent_executor():
|
85 |
+
llm = CustomLLM(n=10)
|
86 |
+
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
|
87 |
+
question_decompose_agent = LLMSingleActionAgent(
|
88 |
+
llm_chain=question_decompose_chain,
|
89 |
+
output_parser=bare_output_parser,
|
90 |
+
stop=["\nObservation:"],
|
91 |
+
allowed_tools=[]
|
92 |
+
)
|
93 |
+
memory = ConversationBufferWindowMemory(k=10)
|
94 |
+
|
95 |
+
return AgentExecutor.from_agent_and_tools(
|
96 |
+
agent=question_decompose_agent,
|
97 |
+
tools=[],
|
98 |
+
verbose=True,
|
99 |
+
memory=memory
|
100 |
+
)
|
101 |
|
102 |
|
103 |
def get_num_citations(pmid: str):
|
104 |
+
"""
|
105 |
+
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
|
106 |
+
pmid. The function uses the Entrez module to query PubMed Central's API, which is part of NCBI's Entrez
|
107 |
+
Programming Utilities. The function first queries PMC using elink to get all articles citing the given pmid,
|
108 |
+
then it counts how many articles are in that list.
|
109 |
+
|
110 |
+
:param pmid: str: Specify the pmid of the article you want to get citations for
|
111 |
+
:return: The number of citations for a given pmid
|
112 |
+
"""
|
113 |
citations_xml = Entrez.read(
|
114 |
Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
|
115 |
|
|
|
221 |
]
|
222 |
# og = Original, qa = Question Asking, ri = Response Improvement
|
223 |
self.prompts = {
|
224 |
+
"og_answer_prompt": """### Instruction: Answer the following question to the best of your ability. Question: {question}
|
225 |
+
### Response: Answer: """,
|
226 |
|
227 |
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
|
228 |
+
keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
|
229 |
+
Your response should be a list of only 2 keywords separated by commas.
|
230 |
+
Text: {original_answer}
|
231 |
+
### Response: Keywords: """,
|
232 |
|
233 |
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
|
234 |
+
context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
|
235 |
+
Question: {question}
|
236 |
+
Context: {context}
|
237 |
+
### Response: Answer: """,
|
238 |
|
239 |
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
|
240 |
+
response that is more informative and intelligent than the original answer and imparts knowledge from
|
241 |
+
both the old answer and from the context ONLY if it helps answer the question.
|
242 |
+
Question: {question}
|
243 |
+
Old Answer: {answer}
|
244 |
+
Context: {answer2}
|
245 |
+
### Response: Improved Answer: """
|
246 |
}
|
247 |
|
248 |
def process_query_words(self, question_words: str, answer_words: str):
|
249 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
250 |
+
"""
|
251 |
+
The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
|
252 |
+
The function first splits the input strings into lists, then removes any duplicate entries from the list.
|
253 |
+
It then iterates through each word in the list and strips it of whitespace before passing it to WordNetLemmatizer().lemmatize()
|
254 |
+
to return its lemma (base form). The function also removes any vague words that are not useful for searching PubMed.
|
255 |
+
|
256 |
+
:param self: Represent the instance of the class
|
257 |
+
:param question_words: str: Get the question words from the user
|
258 |
+
:param answer_words: str: Add the answer words to the list of words that will be searched for in pubmed
|
259 |
+
:return: A list of words that are not vague
|
260 |
+
"""
|
261 |
+
|
262 |
vague_words = ['recommendation', 'recommendations', 'products', 'product']
|
263 |
words = question_words.lower().split(",") + answer_words.lower().split(",")
|
264 |
|
|
|
271 |
return list(set(final_list))
|
272 |
|
273 |
def convert_question_into_words(self, question: str):
|
274 |
+
"""
|
275 |
+
The convert_question_into_words function takes a question and returns the words that are in the question.
|
276 |
+
The function first decomposes the original answer into its component parts, then it decomposes
|
277 |
+
each of those components into their own component parts. It then uses these decomposed answers to
|
278 |
+
find all the words that are in both questions and answers.
|
279 |
+
|
280 |
+
:param self: Make the function a method of the class
|
281 |
+
:param question: str: Pass in the question that is being asked
|
282 |
+
:return: A tuple of two elements
|
283 |
+
:doc-author: Trelent
|
284 |
+
"""
|
285 |
+
|
286 |
original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
|
287 |
print("Original Answer: ", original_answer)
|
288 |
|
|
|
297 |
return words, original_answer
|
298 |
|
299 |
def query_expert(self, question: str = None):
|
300 |
+
question = random.choice(self.default_questions) if question is None else question
|
|
|
301 |
print("Question: ", question)
|
302 |
|
303 |
keywords, original_response = self.convert_question_into_words(question)
|
304 |
print("Keywords: ", keywords)
|
305 |
|
306 |
context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
|
307 |
+
print(context)
|
308 |
|
309 |
if len(context) == 0:
|
310 |
return {
|
|
|
324 |
}
|
325 |
|
326 |
|
327 |
+
herbal_expert = HerbalExpert(get_agent_executor())
|
328 |
|
329 |
|
330 |
if __name__ == '__main__':
|
331 |
+
herbal_expert = HerbalExpert(get_agent_executor())
|
332 |
answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
|
333 |
print(answer['response'])
|
334 |
# return to api? who knows
|