anushm commited on
Commit
bc47ca4
1 Parent(s): e88f7e1

Upload herbal_expert.py

Browse files
Files changed (1) hide show
  1. herbal_expert.py +80 -45
herbal_expert.py CHANGED
@@ -17,6 +17,7 @@ from langchain.callbacks.manager import CallbackManagerForLLMRun
17
  from typing import List, Union, Optional, Any
18
 
19
  ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
 
20
  Entrez.email = "[email protected]"
21
  nltk.download('wordnet')
22
 
@@ -41,14 +42,13 @@ class CustomLLM(LLM):
41
  "content": prompt
42
  }
43
  ],
44
- "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
45
  }
46
 
47
  response = requests.post(ngrok_url + "v1/chat/completions",
48
  headers={"Content-Type": "application/json"}, json=data)
49
  return json.loads(response.text)['choices'][0]['message']['content']
50
 
51
- # return make_inference_call(prompt)
52
 
53
 
54
  class CustomPromptTemplate(StringPromptTemplate):
@@ -64,41 +64,52 @@ class CustomOutputParser(AgentOutputParser):
64
 
65
 
66
  bare_output_parser = CustomOutputParser()
67
- question_decompose_prompt = """
68
- ### Instruction: Given the previous conversation history and the current question, pick out the relevant keywords from the question that would be used to search a medical article database.
69
- Chat History: {history}
 
 
70
  Question: {input}
71
 
72
- Your response should be a list of keywords separated by commas:
73
- ### Response:
74
- """
75
 
76
  prompt_with_history = CustomPromptTemplate(
77
  template=question_decompose_prompt,
78
  tools=[],
79
  input_variables=["input", "history"]
80
  )
81
- # %%
82
- llm = CustomLLM(n=10)
83
- question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
84
-
85
- question_decompose_agent = LLMSingleActionAgent(
86
- llm_chain=question_decompose_chain,
87
- output_parser=bare_output_parser,
88
- stop=["\nObservation:"],
89
- allowed_tools=[]
90
- )
91
 
92
- memory = ConversationBufferWindowMemory(k=10)
93
- ax_1 = AgentExecutor.from_agent_and_tools(
94
- agent=question_decompose_agent,
95
- tools=[],
96
- verbose=True,
97
- memory=memory
98
- )
 
 
 
 
 
 
 
 
 
 
 
99
 
100
 
101
  def get_num_citations(pmid: str):
 
 
 
 
 
 
 
 
 
102
  citations_xml = Entrez.read(
103
  Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
104
 
@@ -210,32 +221,44 @@ class HerbalExpert:
210
  ]
211
  # og = Original, qa = Question Asking, ri = Response Improvement
212
  self.prompts = {
213
- "og_answer_prompt": """### Instruction: Answer the following question using the given context. Question: {question}
214
- Answer: ### Response: """,
215
 
216
  "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
217
- keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
218
- Your response should be a list of only 2 keywords separated by commas.
219
- Text: {original_answer}
220
- Keywords: ### Response: """,
221
 
222
  "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
223
- context is relevant to the question. If the context doesn't help answer the question, proceed to answer
224
- question without using the context.
225
- Question: {question}
226
- Context: {context} ### Response: """,
227
 
228
  "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
229
- response that is more informative and intelligent than the original answer and imparts knowledge from
230
- both the old answer and from the context ONLY if it helps answer the question.
231
- Question: {question}
232
- Old Answer: {answer}
233
- Context: {answer2}
234
- Improved answer: ### Response: """
235
  }
236
 
237
  def process_query_words(self, question_words: str, answer_words: str):
238
  # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
 
 
 
 
 
 
 
 
 
 
 
 
239
  vague_words = ['recommendation', 'recommendations', 'products', 'product']
240
  words = question_words.lower().split(",") + answer_words.lower().split(",")
241
 
@@ -248,6 +271,18 @@ class HerbalExpert:
248
  return list(set(final_list))
249
 
250
  def convert_question_into_words(self, question: str):
 
 
 
 
 
 
 
 
 
 
 
 
251
  original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
252
  print("Original Answer: ", original_answer)
253
 
@@ -262,14 +297,14 @@ class HerbalExpert:
262
  return words, original_answer
263
 
264
  def query_expert(self, question: str = None):
265
- question = self.default_questions[
266
- random.randint(0, len(self.default_questions) - 1)] if question is None else question
267
  print("Question: ", question)
268
 
269
  keywords, original_response = self.convert_question_into_words(question)
270
  print("Keywords: ", keywords)
271
 
272
  context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
 
273
 
274
  if len(context) == 0:
275
  return {
@@ -289,11 +324,11 @@ class HerbalExpert:
289
  }
290
 
291
 
292
- herbal_expert = HerbalExpert(ax_1)
293
 
294
 
295
  if __name__ == '__main__':
296
- herbal_expert = HerbalExpert(ax_1)
297
  answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
298
  print(answer['response'])
299
  # return to api? who knows
 
17
  from typing import List, Union, Optional, Any
18
 
19
  ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
20
+ ngrok_url = 'http://localhost:1234/'
21
  Entrez.email = "[email protected]"
22
  nltk.download('wordnet')
23
 
 
42
  "content": prompt
43
  }
44
  ],
45
+ "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 700, "stream": False
46
  }
47
 
48
  response = requests.post(ngrok_url + "v1/chat/completions",
49
  headers={"Content-Type": "application/json"}, json=data)
50
  return json.loads(response.text)['choices'][0]['message']['content']
51
 
 
52
 
53
 
54
  class CustomPromptTemplate(StringPromptTemplate):
 
64
 
65
 
66
  bare_output_parser = CustomOutputParser()
67
+ question_decompose_prompt = """### Instruction: Identify and list the keywords that capture the essence of the
68
+ question. List them as a string separated by commas. Focus on the question. The first word should be the most
69
+ important keyword and the last word should be the least important keyword.
70
+
71
+ Chat History: {history}
72
  Question: {input}
73
 
74
+ YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
75
+ ### Response: Keywords: """
 
76
 
77
  prompt_with_history = CustomPromptTemplate(
78
  template=question_decompose_prompt,
79
  tools=[],
80
  input_variables=["input", "history"]
81
  )
 
 
 
 
 
 
 
 
 
 
82
 
83
+
84
+ def get_agent_executor():
85
+ llm = CustomLLM(n=10)
86
+ question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
87
+ question_decompose_agent = LLMSingleActionAgent(
88
+ llm_chain=question_decompose_chain,
89
+ output_parser=bare_output_parser,
90
+ stop=["\nObservation:"],
91
+ allowed_tools=[]
92
+ )
93
+ memory = ConversationBufferWindowMemory(k=10)
94
+
95
+ return AgentExecutor.from_agent_and_tools(
96
+ agent=question_decompose_agent,
97
+ tools=[],
98
+ verbose=True,
99
+ memory=memory
100
+ )
101
 
102
 
103
  def get_num_citations(pmid: str):
104
+ """
105
+ The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
106
+ pmid. The function uses the Entrez module to query PubMed Central's API, which is part of NCBI's Entrez
107
+ Programming Utilities. The function first queries PMC using elink to get all articles citing the given pmid,
108
+ then it counts how many articles are in that list.
109
+
110
+ :param pmid: str: Specify the pmid of the article you want to get citations for
111
+ :return: The number of citations for a given pmid
112
+ """
113
  citations_xml = Entrez.read(
114
  Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
115
 
 
221
  ]
222
  # og = Original, qa = Question Asking, ri = Response Improvement
223
  self.prompts = {
224
+ "og_answer_prompt": """### Instruction: Answer the following question to the best of your ability. Question: {question}
225
+ ### Response: Answer: """,
226
 
227
  "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
228
+ keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
229
+ Your response should be a list of only 2 keywords separated by commas.
230
+ Text: {original_answer}
231
+ ### Response: Keywords: """,
232
 
233
  "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
234
+ context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
235
+ Question: {question}
236
+ Context: {context}
237
+ ### Response: Answer: """,
238
 
239
  "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
240
+ response that is more informative and intelligent than the original answer and imparts knowledge from
241
+ both the old answer and from the context ONLY if it helps answer the question.
242
+ Question: {question}
243
+ Old Answer: {answer}
244
+ Context: {answer2}
245
+ ### Response: Improved Answer: """
246
  }
247
 
248
  def process_query_words(self, question_words: str, answer_words: str):
249
  # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
250
+ """
251
+ The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
252
+ The function first splits the input strings into lists, then removes any duplicate entries from the list.
253
+ It then iterates through each word in the list and strips it of whitespace before passing it to WordNetLemmatizer().lemmatize()
254
+ to return its lemma (base form). The function also removes any vague words that are not useful for searching PubMed.
255
+
256
+ :param self: Represent the instance of the class
257
+ :param question_words: str: Get the question words from the user
258
+ :param answer_words: str: Add the answer words to the list of words that will be searched for in pubmed
259
+ :return: A list of words that are not vague
260
+ """
261
+
262
  vague_words = ['recommendation', 'recommendations', 'products', 'product']
263
  words = question_words.lower().split(",") + answer_words.lower().split(",")
264
 
 
271
  return list(set(final_list))
272
 
273
  def convert_question_into_words(self, question: str):
274
+ """
275
+ The convert_question_into_words function takes a question and returns the words that are in the question.
276
+ The function first decomposes the original answer into its component parts, then it decomposes
277
+ each of those components into their own component parts. It then uses these decomposed answers to
278
+ find all the words that are in both questions and answers.
279
+
280
+ :param self: Make the function a method of the class
281
+ :param question: str: Pass in the question that is being asked
282
+ :return: A tuple of two elements
283
+ :doc-author: Trelent
284
+ """
285
+
286
  original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
287
  print("Original Answer: ", original_answer)
288
 
 
297
  return words, original_answer
298
 
299
  def query_expert(self, question: str = None):
300
+ question = random.choice(self.default_questions) if question is None else question
 
301
  print("Question: ", question)
302
 
303
  keywords, original_response = self.convert_question_into_words(question)
304
  print("Keywords: ", keywords)
305
 
306
  context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
307
+ print(context)
308
 
309
  if len(context) == 0:
310
  return {
 
324
  }
325
 
326
 
327
+ herbal_expert = HerbalExpert(get_agent_executor())
328
 
329
 
330
  if __name__ == '__main__':
331
+ herbal_expert = HerbalExpert(get_agent_executor())
332
  answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
333
  print(answer['response'])
334
  # return to api? who knows