Spaces:
Sleeping
Sleeping
Upload herbal_expert.py
Browse files- herbal_expert.py +74 -39
herbal_expert.py
CHANGED
@@ -16,11 +16,12 @@ import nltk
|
|
16 |
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
17 |
from typing import List, Union, Optional, Any
|
18 |
|
19 |
-
ngrok_url = 'https://
|
20 |
-
ngrok_url = 'http://localhost:1234/'
|
21 |
Entrez.email = "[email protected]"
|
22 |
nltk.download('wordnet')
|
23 |
|
|
|
24 |
class CustomLLM(LLM):
|
25 |
n: int
|
26 |
|
@@ -35,6 +36,18 @@ class CustomLLM(LLM):
|
|
35 |
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
36 |
**kwargs: Any,
|
37 |
) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
data = {
|
39 |
"messages": [
|
40 |
{
|
@@ -50,7 +63,6 @@ class CustomLLM(LLM):
|
|
50 |
return json.loads(response.text)['choices'][0]['message']['content']
|
51 |
|
52 |
|
53 |
-
|
54 |
class CustomPromptTemplate(StringPromptTemplate):
|
55 |
template: str
|
56 |
|
@@ -82,6 +94,14 @@ prompt_with_history = CustomPromptTemplate(
|
|
82 |
|
83 |
|
84 |
def get_agent_executor():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
llm = CustomLLM(n=10)
|
86 |
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
|
87 |
question_decompose_agent = LLMSingleActionAgent(
|
@@ -97,15 +117,14 @@ def get_agent_executor():
|
|
97 |
tools=[],
|
98 |
verbose=True,
|
99 |
memory=memory
|
100 |
-
|
101 |
|
102 |
|
103 |
def get_num_citations(pmid: str):
|
104 |
"""
|
105 |
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
|
106 |
-
pmid. The function uses the Entrez module to query PubMed Central's API
|
107 |
-
|
108 |
-
then it counts how many articles are in that list.
|
109 |
|
110 |
:param pmid: str: Specify the pmid of the article you want to get citations for
|
111 |
:return: The number of citations for a given pmid
|
@@ -120,16 +139,18 @@ def get_num_citations(pmid: str):
|
|
120 |
else:
|
121 |
return 0
|
122 |
|
|
|
123 |
def fetch_pubmed_articles(keywords, max_search=10, max_context=3):
|
124 |
"""
|
125 |
-
The fetch_pubmed_articles function takes in a list of keywords and returns
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
:param
|
131 |
-
:param
|
132 |
-
:
|
|
|
133 |
"""
|
134 |
|
135 |
try:
|
@@ -178,11 +199,10 @@ def call_model_with_history(messages: list):
|
|
178 |
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
|
179 |
}
|
180 |
|
181 |
-
response = requests.post(ngrok_url+ "v1/chat/completions", headers={"Content-Type": "application/json"}, json=data)
|
182 |
return json.loads(response.text)['choices'][0]['message']['content']
|
183 |
|
184 |
|
185 |
-
|
186 |
# TODO: add ability to pass message history to model
|
187 |
def format_prompt_and_query(prompt, **kwargs):
|
188 |
"""
|
@@ -190,7 +210,7 @@ def format_prompt_and_query(prompt, **kwargs):
|
|
190 |
arguments, and then calls call_model_with_history with a list of messages containing the formatted prompt.
|
191 |
|
192 |
:param prompt: Format the prompt with the values in kwargs
|
193 |
-
:param **kwargs: Pass a dictionary of key-value pairs to the formatting function
|
194 |
:return: A list of dictionaries
|
195 |
"""
|
196 |
|
@@ -221,37 +241,39 @@ class HerbalExpert:
|
|
221 |
]
|
222 |
# og = Original, qa = Question Asking, ri = Response Improvement
|
223 |
self.prompts = {
|
224 |
-
"og_answer_prompt": """### Instruction: Answer the following question to the best of your ability.
|
225 |
-
|
|
|
226 |
|
227 |
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
|
233 |
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
|
239 |
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
}
|
247 |
|
248 |
def process_query_words(self, question_words: str, answer_words: str):
|
249 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
250 |
"""
|
251 |
The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
|
252 |
-
The function first splits the input strings into lists, then removes any duplicate entries from the list.
|
253 |
-
|
254 |
-
to return its lemma (base form). The function also removes any vague words
|
|
|
255 |
|
256 |
:param self: Represent the instance of the class
|
257 |
:param question_words: str: Get the question words from the user
|
@@ -297,6 +319,19 @@ class HerbalExpert:
|
|
297 |
return words, original_answer
|
298 |
|
299 |
def query_expert(self, question: str = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
question = random.choice(self.default_questions) if question is None else question
|
301 |
print("Question: ", question)
|
302 |
|
@@ -326,9 +361,9 @@ class HerbalExpert:
|
|
326 |
|
327 |
herbal_expert = HerbalExpert(get_agent_executor())
|
328 |
|
329 |
-
|
330 |
if __name__ == '__main__':
|
331 |
herbal_expert = HerbalExpert(get_agent_executor())
|
332 |
-
answer = herbal_expert.query_expert(
|
|
|
333 |
print(answer['response'])
|
334 |
# return to api? who knows
|
|
|
16 |
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
17 |
from typing import List, Union, Optional, Any
|
18 |
|
19 |
+
ngrok_url = 'https://9c1a-2605-7b80-3d-320-fc74-5877-9733-e99b.ngrok-free.app/'
|
20 |
+
#ngrok_url = 'http://localhost:1234/'
|
21 |
Entrez.email = "[email protected]"
|
22 |
nltk.download('wordnet')
|
23 |
|
24 |
+
|
25 |
class CustomLLM(LLM):
|
26 |
n: int
|
27 |
|
|
|
36 |
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
37 |
**kwargs: Any,
|
38 |
) -> str:
|
39 |
+
"""
|
40 |
+
The _call function is the function that will be called by the user.
|
41 |
+
It should take in a prompt and return a response.
|
42 |
+
|
43 |
+
:param self: Represent the instance of the class
|
44 |
+
:param prompt: str: Pass the prompt to the model
|
45 |
+
:param stop: Optional[List[str]]: Define the stop words
|
46 |
+
:param run_manager: Optional[CallbackManagerForLLMRun]: Pass the run manager to the call function
|
47 |
+
:param **kwargs: Any: Pass in any additional parameters that may be needed for the function
|
48 |
+
:param : Pass the prompt to the model
|
49 |
+
:return: A string that is the response of gpt-3 to the prompt
|
50 |
+
"""
|
51 |
data = {
|
52 |
"messages": [
|
53 |
{
|
|
|
63 |
return json.loads(response.text)['choices'][0]['message']['content']
|
64 |
|
65 |
|
|
|
66 |
class CustomPromptTemplate(StringPromptTemplate):
|
67 |
template: str
|
68 |
|
|
|
94 |
|
95 |
|
96 |
def get_agent_executor():
|
97 |
+
"""
|
98 |
+
The get_agent_executor function is a factory function that returns an AgentExecutor object.
|
99 |
+
The AgentExecutor object is the main interface for interacting with the agent.
|
100 |
+
agent.run('query') is the main method for interacting with the agent.
|
101 |
+
|
102 |
+
:return: An AgentExecutor object
|
103 |
+
"""
|
104 |
+
|
105 |
llm = CustomLLM(n=10)
|
106 |
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
|
107 |
question_decompose_agent = LLMSingleActionAgent(
|
|
|
117 |
tools=[],
|
118 |
verbose=True,
|
119 |
memory=memory
|
120 |
+
)
|
121 |
|
122 |
|
123 |
def get_num_citations(pmid: str):
|
124 |
"""
|
125 |
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
|
126 |
+
pmid. The function uses the Entrez module to query PubMed Central's API. The function first queries PMC using
|
127 |
+
elink to get all articles citing the given pmid, then it counts how many articles are in that list.
|
|
|
128 |
|
129 |
:param pmid: str: Specify the pmid of the article you want to get citations for
|
130 |
:return: The number of citations for a given pmid
|
|
|
139 |
else:
|
140 |
return 0
|
141 |
|
142 |
+
|
143 |
def fetch_pubmed_articles(keywords, max_search=10, max_context=3):
|
144 |
"""
|
145 |
+
The fetch_pubmed_articles function takes in a list of keywords and returns the top 3 articles from PubMed that
|
146 |
+
are most relevant to those keywords. First the search is done on max_search articles, the list is then sorted by
|
147 |
+
number of citations, then the top max_content articles are chosen from that list. If no articles are found with
|
148 |
+
the initial list of keywords, the search is rerun with the top 4 keywords of the list
|
149 |
+
|
150 |
+
:param keywords: Search for articles in pubmed
|
151 |
+
:param max_search: Limit the number of initial search results
|
152 |
+
:param max_context: Specify the number of articles to return
|
153 |
+
:return: A list of articles
|
154 |
"""
|
155 |
|
156 |
try:
|
|
|
199 |
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
|
200 |
}
|
201 |
|
202 |
+
response = requests.post(ngrok_url + "v1/chat/completions", headers={"Content-Type": "application/json"}, json=data)
|
203 |
return json.loads(response.text)['choices'][0]['message']['content']
|
204 |
|
205 |
|
|
|
206 |
# TODO: add ability to pass message history to model
|
207 |
def format_prompt_and_query(prompt, **kwargs):
|
208 |
"""
|
|
|
210 |
arguments, and then calls call_model_with_history with a list of messages containing the formatted prompt.
|
211 |
|
212 |
:param prompt: Format the prompt with the values in kwargs
|
213 |
+
:param **kwargs: Pass a dictionary of key-value pairs to the prompt formatting function
|
214 |
:return: A list of dictionaries
|
215 |
"""
|
216 |
|
|
|
241 |
]
|
242 |
# og = Original, qa = Question Asking, ri = Response Improvement
|
243 |
self.prompts = {
|
244 |
+
"og_answer_prompt": """### Instruction: Answer the following question to the best of your ability.
|
245 |
+
Question: {question}
|
246 |
+
### Response: Answer: """,
|
247 |
|
248 |
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
|
249 |
+
keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
|
250 |
+
Your response should be a list of only 2 keywords separated by commas.
|
251 |
+
Text: {original_answer}
|
252 |
+
### Response: Keywords: """,
|
253 |
|
254 |
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
|
255 |
+
context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
|
256 |
+
Question: {question}
|
257 |
+
Context: {context}
|
258 |
+
### Response: Answer: """,
|
259 |
|
260 |
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
|
261 |
+
response that is more informative and intelligent than the original answer and imparts knowledge from
|
262 |
+
both the old answer and from the context ONLY if it helps answer the question.
|
263 |
+
Question: {question}
|
264 |
+
Old Answer: {answer}
|
265 |
+
Context: {answer2}
|
266 |
+
### Response: Improved Answer: """
|
267 |
}
|
268 |
|
269 |
def process_query_words(self, question_words: str, answer_words: str):
|
270 |
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
|
271 |
"""
|
272 |
The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
|
273 |
+
The function first splits the input strings into lists, then removes any duplicate entries from the list. It
|
274 |
+
then iterates through each word in the list and strips it of whitespace before passing it to
|
275 |
+
WordNetLemmatizer().lemmatize() to return its lemma (base form). The function also removes any vague words
|
276 |
+
that are not useful for searching PubMed.
|
277 |
|
278 |
:param self: Represent the instance of the class
|
279 |
:param question_words: str: Get the question words from the user
|
|
|
319 |
return words, original_answer
|
320 |
|
321 |
def query_expert(self, question: str = None):
|
322 |
+
"""
|
323 |
+
The query_expert function takes a question as input and returns the expert's response to that question. The
|
324 |
+
function first converts the question into keywords, then uses those keywords to search PubMed for relevant
|
325 |
+
articles. If no articles are found, it returns the original response from the expert (i.e., without context).
|
326 |
+
If at least one article is found, it asks two follow-up questions: 1) "What do you think of this answer
|
327 |
+
in light of these new findings?" and 2) "How would you improve your answer based on these new
|
328 |
+
findings?". It then returns both responses.
|
329 |
+
|
330 |
+
:param self: Represent the instance of the class
|
331 |
+
:param question: str: Pass in the question that is to be asked
|
332 |
+
:return: A dictionary with the question, response and run info
|
333 |
+
"""
|
334 |
+
|
335 |
question = random.choice(self.default_questions) if question is None else question
|
336 |
print("Question: ", question)
|
337 |
|
|
|
361 |
|
362 |
herbal_expert = HerbalExpert(get_agent_executor())
|
363 |
|
|
|
364 |
if __name__ == '__main__':
|
365 |
herbal_expert = HerbalExpert(get_agent_executor())
|
366 |
+
answer = herbal_expert.query_expert(
|
367 |
+
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
|
368 |
print(answer['response'])
|
369 |
# return to api? who knows
|