anushm commited on
Commit
1f43c99
1 Parent(s): bc47ca4

Upload herbal_expert.py

Browse files
Files changed (1) hide show
  1. herbal_expert.py +74 -39
herbal_expert.py CHANGED
@@ -16,11 +16,12 @@ import nltk
16
  from langchain.callbacks.manager import CallbackManagerForLLMRun
17
  from typing import List, Union, Optional, Any
18
 
19
- ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
20
- ngrok_url = 'http://localhost:1234/'
21
  Entrez.email = "[email protected]"
22
  nltk.download('wordnet')
23
 
 
24
  class CustomLLM(LLM):
25
  n: int
26
 
@@ -35,6 +36,18 @@ class CustomLLM(LLM):
35
  run_manager: Optional[CallbackManagerForLLMRun] = None,
36
  **kwargs: Any,
37
  ) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
38
  data = {
39
  "messages": [
40
  {
@@ -50,7 +63,6 @@ class CustomLLM(LLM):
50
  return json.loads(response.text)['choices'][0]['message']['content']
51
 
52
 
53
-
54
  class CustomPromptTemplate(StringPromptTemplate):
55
  template: str
56
 
@@ -82,6 +94,14 @@ prompt_with_history = CustomPromptTemplate(
82
 
83
 
84
  def get_agent_executor():
 
 
 
 
 
 
 
 
85
  llm = CustomLLM(n=10)
86
  question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
87
  question_decompose_agent = LLMSingleActionAgent(
@@ -97,15 +117,14 @@ def get_agent_executor():
97
  tools=[],
98
  verbose=True,
99
  memory=memory
100
- )
101
 
102
 
103
  def get_num_citations(pmid: str):
104
  """
105
  The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
106
- pmid. The function uses the Entrez module to query PubMed Central's API, which is part of NCBI's Entrez
107
- Programming Utilities. The function first queries PMC using elink to get all articles citing the given pmid,
108
- then it counts how many articles are in that list.
109
 
110
  :param pmid: str: Specify the pmid of the article you want to get citations for
111
  :return: The number of citations for a given pmid
@@ -120,16 +139,18 @@ def get_num_citations(pmid: str):
120
  else:
121
  return 0
122
 
 
123
  def fetch_pubmed_articles(keywords, max_search=10, max_context=3):
124
  """
125
- The fetch_pubmed_articles function takes in a list of keywords and returns a list of articles.
126
- The function uses the Entrez API to search for articles with the given keywords, then fetches
127
- those articles from PubMed. The function returns a list of strings, where each string is an article.
128
-
129
- :param keywords: Search for articles in the pubmed database
130
- :param max_results: Specify the number of articles to be returned default is 1
131
- :param email: Identify the user to ncbi
132
- :return: A list of strings
 
133
  """
134
 
135
  try:
@@ -178,11 +199,10 @@ def call_model_with_history(messages: list):
178
  "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
179
  }
180
 
181
- response = requests.post(ngrok_url+ "v1/chat/completions", headers={"Content-Type": "application/json"}, json=data)
182
  return json.loads(response.text)['choices'][0]['message']['content']
183
 
184
 
185
-
186
  # TODO: add ability to pass message history to model
187
  def format_prompt_and_query(prompt, **kwargs):
188
  """
@@ -190,7 +210,7 @@ def format_prompt_and_query(prompt, **kwargs):
190
  arguments, and then calls call_model_with_history with a list of messages containing the formatted prompt.
191
 
192
  :param prompt: Format the prompt with the values in kwargs
193
- :param **kwargs: Pass a dictionary of key-value pairs to the formatting function
194
  :return: A list of dictionaries
195
  """
196
 
@@ -221,37 +241,39 @@ class HerbalExpert:
221
  ]
222
  # og = Original, qa = Question Asking, ri = Response Improvement
223
  self.prompts = {
224
- "og_answer_prompt": """### Instruction: Answer the following question to the best of your ability. Question: {question}
225
- ### Response: Answer: """,
 
226
 
227
  "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
228
- keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
229
- Your response should be a list of only 2 keywords separated by commas.
230
- Text: {original_answer}
231
- ### Response: Keywords: """,
232
 
233
  "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
234
- context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
235
- Question: {question}
236
- Context: {context}
237
- ### Response: Answer: """,
238
 
239
  "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
240
- response that is more informative and intelligent than the original answer and imparts knowledge from
241
- both the old answer and from the context ONLY if it helps answer the question.
242
- Question: {question}
243
- Old Answer: {answer}
244
- Context: {answer2}
245
- ### Response: Improved Answer: """
246
  }
247
 
248
  def process_query_words(self, question_words: str, answer_words: str):
249
  # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
250
  """
251
  The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
252
- The function first splits the input strings into lists, then removes any duplicate entries from the list.
253
- It then iterates through each word in the list and strips it of whitespace before passing it to WordNetLemmatizer().lemmatize()
254
- to return its lemma (base form). The function also removes any vague words that are not useful for searching PubMed.
 
255
 
256
  :param self: Represent the instance of the class
257
  :param question_words: str: Get the question words from the user
@@ -297,6 +319,19 @@ class HerbalExpert:
297
  return words, original_answer
298
 
299
  def query_expert(self, question: str = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  question = random.choice(self.default_questions) if question is None else question
301
  print("Question: ", question)
302
 
@@ -326,9 +361,9 @@ class HerbalExpert:
326
 
327
  herbal_expert = HerbalExpert(get_agent_executor())
328
 
329
-
330
  if __name__ == '__main__':
331
  herbal_expert = HerbalExpert(get_agent_executor())
332
- answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
 
333
  print(answer['response'])
334
  # return to api? who knows
 
16
  from langchain.callbacks.manager import CallbackManagerForLLMRun
17
  from typing import List, Union, Optional, Any
18
 
19
+ ngrok_url = 'https://9c1a-2605-7b80-3d-320-fc74-5877-9733-e99b.ngrok-free.app/'
20
+ #ngrok_url = 'http://localhost:1234/'
21
  Entrez.email = "[email protected]"
22
  nltk.download('wordnet')
23
 
24
+
25
  class CustomLLM(LLM):
26
  n: int
27
 
 
36
  run_manager: Optional[CallbackManagerForLLMRun] = None,
37
  **kwargs: Any,
38
  ) -> str:
39
+ """
40
+ The _call function is the function that will be called by the user.
41
+ It should take in a prompt and return a response.
42
+
43
+ :param self: Represent the instance of the class
44
+ :param prompt: str: Pass the prompt to the model
45
+ :param stop: Optional[List[str]]: Define the stop words
46
+ :param run_manager: Optional[CallbackManagerForLLMRun]: Pass the run manager to the call function
47
+ :param **kwargs: Any: Pass in any additional parameters that may be needed for the function
48
+ :param : Pass the prompt to the model
49
+ :return: A string that is the response of gpt-3 to the prompt
50
+ """
51
  data = {
52
  "messages": [
53
  {
 
63
  return json.loads(response.text)['choices'][0]['message']['content']
64
 
65
 
 
66
  class CustomPromptTemplate(StringPromptTemplate):
67
  template: str
68
 
 
94
 
95
 
96
  def get_agent_executor():
97
+ """
98
+ The get_agent_executor function is a factory function that returns an AgentExecutor object.
99
+ The AgentExecutor object is the main interface for interacting with the agent.
100
+ agent.run('query') is the main method for interacting with the agent.
101
+
102
+ :return: An AgentExecutor object
103
+ """
104
+
105
  llm = CustomLLM(n=10)
106
  question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
107
  question_decompose_agent = LLMSingleActionAgent(
 
117
  tools=[],
118
  verbose=True,
119
  memory=memory
120
+ )
121
 
122
 
123
  def get_num_citations(pmid: str):
124
  """
125
  The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
126
+ pmid. The function uses the Entrez module to query PubMed Central's API. The function first queries PMC using
127
+ elink to get all articles citing the given pmid, then it counts how many articles are in that list.
 
128
 
129
  :param pmid: str: Specify the pmid of the article you want to get citations for
130
  :return: The number of citations for a given pmid
 
139
  else:
140
  return 0
141
 
142
+
143
  def fetch_pubmed_articles(keywords, max_search=10, max_context=3):
144
  """
145
+ The fetch_pubmed_articles function takes in a list of keywords and returns the top 3 articles from PubMed that
146
+ are most relevant to those keywords. First the search is done on max_search articles, the list is then sorted by
147
+ number of citations, then the top max_content articles are chosen from that list. If no articles are found with
148
+ the initial list of keywords, the search is rerun with the top 4 keywords of the list
149
+
150
+ :param keywords: Search for articles in pubmed
151
+ :param max_search: Limit the number of initial search results
152
+ :param max_context: Specify the number of articles to return
153
+ :return: A list of articles
154
  """
155
 
156
  try:
 
199
  "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
200
  }
201
 
202
+ response = requests.post(ngrok_url + "v1/chat/completions", headers={"Content-Type": "application/json"}, json=data)
203
  return json.loads(response.text)['choices'][0]['message']['content']
204
 
205
 
 
206
  # TODO: add ability to pass message history to model
207
  def format_prompt_and_query(prompt, **kwargs):
208
  """
 
210
  arguments, and then calls call_model_with_history with a list of messages containing the formatted prompt.
211
 
212
  :param prompt: Format the prompt with the values in kwargs
213
+ :param **kwargs: Pass a dictionary of key-value pairs to the prompt formatting function
214
  :return: A list of dictionaries
215
  """
216
 
 
241
  ]
242
  # og = Original, qa = Question Asking, ri = Response Improvement
243
  self.prompts = {
244
+ "og_answer_prompt": """### Instruction: Answer the following question to the best of your ability.
245
+ Question: {question}
246
+ ### Response: Answer: """,
247
 
248
  "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
249
+ keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
250
+ Your response should be a list of only 2 keywords separated by commas.
251
+ Text: {original_answer}
252
+ ### Response: Keywords: """,
253
 
254
  "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
255
+ context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
256
+ Question: {question}
257
+ Context: {context}
258
+ ### Response: Answer: """,
259
 
260
  "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
261
+ response that is more informative and intelligent than the original answer and imparts knowledge from
262
+ both the old answer and from the context ONLY if it helps answer the question.
263
+ Question: {question}
264
+ Old Answer: {answer}
265
+ Context: {answer2}
266
+ ### Response: Improved Answer: """
267
  }
268
 
269
  def process_query_words(self, question_words: str, answer_words: str):
270
  # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
271
  """
272
  The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
273
+ The function first splits the input strings into lists, then removes any duplicate entries from the list. It
274
+ then iterates through each word in the list and strips it of whitespace before passing it to
275
+ WordNetLemmatizer().lemmatize() to return its lemma (base form). The function also removes any vague words
276
+ that are not useful for searching PubMed.
277
 
278
  :param self: Represent the instance of the class
279
  :param question_words: str: Get the question words from the user
 
319
  return words, original_answer
320
 
321
  def query_expert(self, question: str = None):
322
+ """
323
+ The query_expert function takes a question as input and returns the expert's response to that question. The
324
+ function first converts the question into keywords, then uses those keywords to search PubMed for relevant
325
+ articles. If no articles are found, it returns the original response from the expert (i.e., without context).
326
+ If at least one article is found, it asks two follow-up questions: 1) "What do you think of this answer
327
+ in light of these new findings?" and 2) "How would you improve your answer based on these new
328
+ findings?". It then returns both responses.
329
+
330
+ :param self: Represent the instance of the class
331
+ :param question: str: Pass in the question that is to be asked
332
+ :return: A dictionary with the question, response and run info
333
+ """
334
+
335
  question = random.choice(self.default_questions) if question is None else question
336
  print("Question: ", question)
337
 
 
361
 
362
  herbal_expert = HerbalExpert(get_agent_executor())
363
 
 
364
  if __name__ == '__main__':
365
  herbal_expert = HerbalExpert(get_agent_executor())
366
+ answer = herbal_expert.query_expert(
367
+ "I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
368
  print(answer['response'])
369
  # return to api? who knows