Spaces:
Sleeping
Sleeping
File size: 16,222 Bytes
2658964 8895bbd 2658964 1f43c99 8895bbd 2658964 1f43c99 2658964 1f43c99 2658964 bc47ca4 2658964 aefa8cd 2658964 bc47ca4 2658964 bc47ca4 2658964 bc47ca4 1f43c99 bc47ca4 1f43c99 2658964 bc47ca4 1f43c99 bc47ca4 2658964 1f43c99 2658964 1f43c99 2658964 1f43c99 2658964 1f43c99 2658964 1f43c99 2658964 1f43c99 2658964 e88f7e1 1f43c99 2658964 1f43c99 2658964 bc47ca4 1f43c99 bc47ca4 2658964 bc47ca4 2658964 1f43c99 bc47ca4 2658964 bc47ca4 2658964 bc47ca4 2658964 bc47ca4 1f43c99 2658964 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
import requests
import json
import random
from langchain.agents import AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain.schema import AgentAction, AgentFinish
from langchain.memory import ConversationBufferWindowMemory
from langchain import LLMChain
from langchain.llms.base import LLM
from Bio import Entrez
from requests import HTTPError
from nltk.stem import WordNetLemmatizer
import nltk
from langchain.callbacks.manager import CallbackManagerForLLMRun
from typing import List, Union, Optional, Any
ngrok_url = 'https://9c1a-2605-7b80-3d-320-fc74-5877-9733-e99b.ngrok-free.app/'
#ngrok_url = 'http://localhost:1234/'
Entrez.email = "[email protected]"
nltk.download('wordnet')
class CustomLLM(LLM):
n: int
@property
def _llm_type(self) -> str:
return "custom"
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""
The _call function is the function that will be called by the user.
It should take in a prompt and return a response.
:param self: Represent the instance of the class
:param prompt: str: Pass the prompt to the model
:param stop: Optional[List[str]]: Define the stop words
:param run_manager: Optional[CallbackManagerForLLMRun]: Pass the run manager to the call function
:param **kwargs: Any: Pass in any additional parameters that may be needed for the function
:param : Pass the prompt to the model
:return: A string that is the response of gpt-3 to the prompt
"""
data = {
"messages": [
{
"role": "user",
"content": prompt
}
],
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 700, "stream": False
}
response = requests.post(ngrok_url + "v1/chat/completions",
headers={"Content-Type": "application/json"}, json=data)
return json.loads(response.text)['choices'][0]['message']['content']
class CustomPromptTemplate(StringPromptTemplate):
template: str
def format(self, **kwargs) -> str:
return self.template.format(**kwargs)
class CustomOutputParser(AgentOutputParser):
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
return AgentFinish(return_values={"output": llm_output}, log=llm_output)
bare_output_parser = CustomOutputParser()
question_decompose_prompt = """### Instruction: Identify and list the keywords that capture the essence of the
question. List them as a string separated by commas. Focus on the question. The first word should be the most
important keyword and the last word should be the least important keyword.
Chat History: {history}
Question: {input}
YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
### Response: Keywords: """
prompt_with_history = CustomPromptTemplate(
template=question_decompose_prompt,
tools=[],
input_variables=["input", "history"]
)
def get_agent_executor():
"""
The get_agent_executor function is a factory function that returns an AgentExecutor object.
The AgentExecutor object is the main interface for interacting with the agent.
agent.run('query') is the main method for interacting with the agent.
:return: An AgentExecutor object
"""
llm = CustomLLM(n=10)
question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
question_decompose_agent = LLMSingleActionAgent(
llm_chain=question_decompose_chain,
output_parser=bare_output_parser,
stop=["\nObservation:"],
allowed_tools=[]
)
memory = ConversationBufferWindowMemory(k=10)
return AgentExecutor.from_agent_and_tools(
agent=question_decompose_agent,
tools=[],
verbose=True,
memory=memory
)
def get_num_citations(pmid: str):
"""
The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
pmid. The function uses the Entrez module to query PubMed Central's API. The function first queries PMC using
elink to get all articles citing the given pmid, then it counts how many articles are in that list.
:param pmid: str: Specify the pmid of the article you want to get citations for
:return: The number of citations for a given pmid
"""
citations_xml = Entrez.read(
Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
for i in range(0, len(citations_xml)):
if len(citations_xml[i]["LinkSetDb"]) > 0:
pmids_list = [link["Id"] for link in citations_xml[i]["LinkSetDb"][0]["Link"]]
return len(pmids_list)
else:
return 0
def fetch_pubmed_articles(keywords, max_search=10, max_context=3):
"""
The fetch_pubmed_articles function takes in a list of keywords and returns the top 3 articles from PubMed that
are most relevant to those keywords. First the search is done on max_search articles, the list is then sorted by
number of citations, then the top max_content articles are chosen from that list. If no articles are found with
the initial list of keywords, the search is rerun with the top 4 keywords of the list
:param keywords: Search for articles in pubmed
:param max_search: Limit the number of initial search results
:param max_context: Specify the number of articles to return
:return: A list of articles
"""
try:
search_result = Entrez.esearch(db="pubmed", term=keywords, retmax=max_search)
id_list = Entrez.read(search_result)["IdList"]
if len(id_list) == 0:
search_result = Entrez.esearch(db="pubmed", term=keywords[:4], retmax=max_search)
id_list = Entrez.read(search_result)["IdList"]
num_citations = [(id, get_num_citations(id)) for id in id_list]
top_n_papers = sorted(num_citations, key=lambda x: x[1], reverse=True)[:max_context]
print(f"top_{max_context}_papers: ", top_n_papers)
top_n_papers = [paper[0] for paper in top_n_papers]
fetch_handle = Entrez.efetch(db="pubmed", id=top_n_papers, rettype="medline", retmode="xml")
fetched_articles = Entrez.read(fetch_handle)
articles = []
# somehow only pull natural therapeutic articles
for fetched in fetched_articles['PubmedArticle']:
title = fetched['MedlineCitation']['Article']['ArticleTitle']
abstract = fetched['MedlineCitation']['Article']['Abstract']['AbstractText'][0] if 'Abstract' in fetched[
'MedlineCitation']['Article'] else "No Abstract"
# pmid = fetched['MedlineCitation']['PMID']
articles.append(title + "\n" + abstract)
return articles
except HTTPError as e:
print("HTTPError: ", e)
return []
except RuntimeError as e:
print("RuntimeError: ", e)
return []
def call_model_with_history(messages: list):
"""
The call_model_with_history function takes a list of messages and returns the next message in the conversation.
:param messages: list: Pass the history of messages to the model
:return: the text of the model's reply
"""
data = {
"messages": messages,
"stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
}
response = requests.post(ngrok_url + "v1/chat/completions", headers={"Content-Type": "application/json"}, json=data)
return json.loads(response.text)['choices'][0]['message']['content']
# TODO: add ability to pass message history to model
def format_prompt_and_query(prompt, **kwargs):
"""
The format_prompt_and_query function takes a prompt and keyword arguments, formats the prompt with the keyword
arguments, and then calls call_model_with_history with a list of messages containing the formatted prompt.
:param prompt: Format the prompt with the values in kwargs
:param **kwargs: Pass a dictionary of key-value pairs to the prompt formatting function
:return: A list of dictionaries
"""
formatted_prompt = prompt.format(**kwargs)
messages = [
{"role": "system", "content": "Perform the instructions to the best of your ability."},
{"role": "user", "content": formatted_prompt}
]
return call_model_with_history(messages)
class HerbalExpert:
def __init__(self, qd_chain):
self.qd_chain = qd_chain
self.wnl = WordNetLemmatizer()
self.default_questions = [
"How is chamomile traditionally used in herbal medicine?",
"What are the potential side effects or interactions of consuming echinacea?",
"Can you explain the different methods of consuming lavender for health benefits?",
"Which herbs are commonly known for their anti-inflammatory properties?",
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?",
"Are there any natural herbs that could support better sleep?",
"What cannabis or hemp products would you recommend for chronic pain relief?",
"I'm looking to boost my immune system. Are there any specific herbs or supplements that could help?",
"Which herbs or supplements are recommended for enhancing cognitive functions and memory?"
]
# og = Original, qa = Question Asking, ri = Response Improvement
self.prompts = {
"og_answer_prompt": """### Instruction: Answer the following question to the best of your ability.
Question: {question}
### Response: Answer: """,
"ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
Your response should be a list of only 2 keywords separated by commas.
Text: {original_answer}
### Response: Keywords: """,
"qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
Question: {question}
Context: {context}
### Response: Answer: """,
"ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
response that is more informative and intelligent than the original answer and imparts knowledge from
both the old answer and from the context ONLY if it helps answer the question.
Question: {question}
Old Answer: {answer}
Context: {answer2}
### Response: Improved Answer: """
}
def process_query_words(self, question_words: str, answer_words: str):
# don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
"""
The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
The function first splits the input strings into lists, then removes any duplicate entries from the list. It
then iterates through each word in the list and strips it of whitespace before passing it to
WordNetLemmatizer().lemmatize() to return its lemma (base form). The function also removes any vague words
that are not useful for searching PubMed.
:param self: Represent the instance of the class
:param question_words: str: Get the question words from the user
:param answer_words: str: Add the answer words to the list of words that will be searched for in pubmed
:return: A list of words that are not vague
"""
vague_words = ['recommendation', 'recommendations', 'products', 'product']
words = question_words.lower().split(",") + answer_words.lower().split(",")
final_list = []
for word in words:
cleaned = word.strip().strip('"')
if cleaned not in vague_words:
final_list.append(self.wnl.lemmatize(cleaned))
return list(set(final_list))
def convert_question_into_words(self, question: str):
"""
The convert_question_into_words function takes a question and returns the words that are in the question.
The function first decomposes the original answer into its component parts, then it decomposes
each of those components into their own component parts. It then uses these decomposed answers to
find all the words that are in both questions and answers.
:param self: Make the function a method of the class
:param question: str: Pass in the question that is being asked
:return: A tuple of two elements
:doc-author: Trelent
"""
original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
print("Original Answer: ", original_answer)
question_decompose = self.qd_chain.run(question)
print("Question Decompose: ", question_decompose)
original_answer_decompose = format_prompt_and_query(self.prompts["ans_decompose_prompt"],
original_answer=original_answer)
print("Original Answer Decomposed: ", original_answer_decompose)
words = self.process_query_words(question_decompose, original_answer_decompose)
return words, original_answer
def query_expert(self, question: str = None):
"""
The query_expert function takes a question as input and returns the expert's response to that question. The
function first converts the question into keywords, then uses those keywords to search PubMed for relevant
articles. If no articles are found, it returns the original response from the expert (i.e., without context).
If at least one article is found, it asks two follow-up questions: 1) "What do you think of this answer
in light of these new findings?" and 2) "How would you improve your answer based on these new
findings?". It then returns both responses.
:param self: Represent the instance of the class
:param question: str: Pass in the question that is to be asked
:return: A dictionary with the question, response and run info
"""
question = random.choice(self.default_questions) if question is None else question
print("Question: ", question)
keywords, original_response = self.convert_question_into_words(question)
print("Keywords: ", keywords)
context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
print(context)
if len(context) == 0:
return {
"question": question,
"response": original_response,
"info": "No context found"
}
contextual_response = format_prompt_and_query(self.prompts["qa_prompt"], question=question, context=context)
improved_response = format_prompt_and_query(self.prompts["ri_prompt"], question=question,
answer=original_response, answer2=contextual_response)
return {
"question": question,
"response": improved_response,
"info": "Success"
}
herbal_expert = HerbalExpert(get_agent_executor())
if __name__ == '__main__':
herbal_expert = HerbalExpert(get_agent_executor())
answer = herbal_expert.query_expert(
"I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
print(answer['response'])
# return to api? who knows
|