Spaces:

AnunaAI
/

herbal-expert

Sleeping

App Files Files Community

anushm commited on Oct 23, 2023

Commit

bc47ca4

•

1 Parent(s): e88f7e1

Upload herbal_expert.py

Browse files

Files changed (1) hide show

herbal_expert.py +80 -45

herbal_expert.py CHANGED Viewed

@@ -17,6 +17,7 @@ from langchain.callbacks.manager import CallbackManagerForLLMRun
 from typing import List, Union, Optional, Any
 ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
 Entrez.email = "[email protected]"
 nltk.download('wordnet')
@@ -41,14 +42,13 @@ class CustomLLM(LLM):
                     "content": prompt
                 }
             ],
-            "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 512, "stream": False
         }
         response = requests.post(ngrok_url + "v1/chat/completions",
                                  headers={"Content-Type": "application/json"}, json=data)
         return json.loads(response.text)['choices'][0]['message']['content']
-        # return make_inference_call(prompt)
 class CustomPromptTemplate(StringPromptTemplate):
@@ -64,41 +64,52 @@ class CustomOutputParser(AgentOutputParser):
 bare_output_parser = CustomOutputParser()
-question_decompose_prompt = """
-### Instruction: Given the previous conversation history and the current question, pick out the relevant keywords from the question that would be used to search a medical article database.
-Chat History: {history}
 Question: {input}
-Your response should be a list of keywords separated by commas:
-### Response:
-"""
 prompt_with_history = CustomPromptTemplate(
     template=question_decompose_prompt,
     tools=[],
     input_variables=["input", "history"]
 )
-# %%
-llm = CustomLLM(n=10)
-question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
-question_decompose_agent = LLMSingleActionAgent(
-    llm_chain=question_decompose_chain,
-    output_parser=bare_output_parser,
-    stop=["\nObservation:"],
-    allowed_tools=[]
-)
-memory = ConversationBufferWindowMemory(k=10)
-ax_1 = AgentExecutor.from_agent_and_tools(
-    agent=question_decompose_agent,
-    tools=[],
-    verbose=True,
-    memory=memory
-)
 def get_num_citations(pmid: str):
     citations_xml = Entrez.read(
         Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
@@ -210,32 +221,44 @@ class HerbalExpert:
         ]
         # og = Original, qa = Question Asking, ri = Response Improvement
         self.prompts = {
-            "og_answer_prompt": """### Instruction: Answer the following question using the given context. Question: {question}
-            Answer: ### Response: """,
             "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
-            keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
-            Your response should be a list of only 2 keywords separated by commas.
-            Text: {original_answer}
-            Keywords: ### Response: """,
             "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
-            context is relevant to the question. If the context doesn't help answer the question, proceed to answer
-            question without using the context.
-            Question: {question}
-            Context: {context} ### Response: """,
             "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
-            response that is more informative and intelligent than the original answer and imparts knowledge from
-            both the old answer and from the context ONLY if it helps answer the question.
-            Question: {question}
-            Old Answer: {answer}
-            Context: {answer2}
-            Improved answer: ### Response: """
         }
     def process_query_words(self, question_words: str, answer_words: str):
         # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
         vague_words = ['recommendation', 'recommendations', 'products', 'product']
         words = question_words.lower().split(",") + answer_words.lower().split(",")
@@ -248,6 +271,18 @@ class HerbalExpert:
         return list(set(final_list))
     def convert_question_into_words(self, question: str):
         original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
         print("Original Answer: ", original_answer)
@@ -262,14 +297,14 @@ class HerbalExpert:
         return words, original_answer
     def query_expert(self, question: str = None):
-        question = self.default_questions[
-            random.randint(0, len(self.default_questions) - 1)] if question is None else question
         print("Question: ", question)
         keywords, original_response = self.convert_question_into_words(question)
         print("Keywords: ", keywords)
         context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
         if len(context) == 0:
             return {
@@ -289,11 +324,11 @@ class HerbalExpert:
         }
-herbal_expert = HerbalExpert(ax_1)
 if __name__ == '__main__':
-    herbal_expert = HerbalExpert(ax_1)
     answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
     print(answer['response'])
     # return to api? who knows

 from typing import List, Union, Optional, Any
 ngrok_url = 'https://2590-2605-7b80-3d-320-a515-4f0d-f60e-71e5.ngrok-free.app/'
+ngrok_url = 'http://localhost:1234/'
 Entrez.email = "[email protected]"
 nltk.download('wordnet')
                     "content": prompt
                 }
             ],
+            "stop": ["### Instruction:"], "temperature": 0, "max_tokens": 700, "stream": False
         }
         response = requests.post(ngrok_url + "v1/chat/completions",
                                  headers={"Content-Type": "application/json"}, json=data)
         return json.loads(response.text)['choices'][0]['message']['content']
 class CustomPromptTemplate(StringPromptTemplate):
 bare_output_parser = CustomOutputParser()
+question_decompose_prompt = """### Instruction: Identify and list the keywords that capture the essence of the
+question. List them as a string separated by commas. Focus on the question. The first word should be the most
+important keyword and the last word should be the least important keyword.
+Chat History: {history}
 Question: {input}
+YOUR RESPONSE SHOULD BE A STRING OF COMMA SEPARATED KEYWORDS:
+### Response: Keywords: """
 prompt_with_history = CustomPromptTemplate(
     template=question_decompose_prompt,
     tools=[],
     input_variables=["input", "history"]
 )
+def get_agent_executor():
+    llm = CustomLLM(n=10)
+    question_decompose_chain = LLMChain(llm=llm, prompt=prompt_with_history)
+    question_decompose_agent = LLMSingleActionAgent(
+        llm_chain=question_decompose_chain,
+        output_parser=bare_output_parser,
+        stop=["\nObservation:"],
+        allowed_tools=[]
+    )
+    memory = ConversationBufferWindowMemory(k=10)
+    return AgentExecutor.from_agent_and_tools(
+        agent=question_decompose_agent,
+        tools=[],
+        verbose=True,
+        memory=memory
+        )
 def get_num_citations(pmid: str):
+    """
+    The get_num_citations function takes a PubMed ID (pmid) as input and returns the number of citations for that
+    pmid. The function uses the Entrez module to query PubMed Central's API, which is part of NCBI's Entrez
+    Programming Utilities. The function first queries PMC using elink to get all articles citing the given pmid,
+    then it counts how many articles are in that list.
+    :param pmid: str: Specify the pmid of the article you want to get citations for
+    :return: The number of citations for a given pmid
+    """
     citations_xml = Entrez.read(
         Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pubmed_citedin", from_uid=pmid))
         ]
         # og = Original, qa = Question Asking, ri = Response Improvement
         self.prompts = {
+            "og_answer_prompt": """### Instruction: Answer the following question to the best of your ability. Question: {question}
+                    ### Response: Answer: """,
             "ans_decompose_prompt": """### Instruction: Given the following text, identify the 2 most important
+                    keywords that capture the essence of the text. If there's a list of products, choose the top 2 products.
+                    Your response should be a list of only 2 keywords separated by commas.
+                    Text: {original_answer}
+                    ### Response: Keywords: """,
             "qa_prompt": """### Instruction: Answer the following question using the given context ONLY if the
+                    context is relevant to the question. If the context doesn't help answer the question respond with "I don't know".
+                    Question: {question}
+                    Context: {context}
+                    ### Response: Answer: """,
             "ri_prompt": """### Instruction: You are an caring, intelligent question answering agent. Craft a
+                    response that is more informative and intelligent than the original answer and imparts knowledge from
+                    both the old answer and from the context ONLY if it helps answer the question.
+                    Question: {question}
+                    Old Answer: {answer}
+                    Context: {answer2}
+                    ### Response: Improved Answer: """
         }
     def process_query_words(self, question_words: str, answer_words: str):
         # don't need to be searching for these in pubmed. Should we include: 'supplements', 'supplement'
+        """
+        The process_query_words function takes in a string of words and returns a list of filtered lemmatized words.
+        The function first splits the input strings into lists, then removes any duplicate entries from the list.
+        It then iterates through each word in the list and strips it of whitespace before passing it to WordNetLemmatizer().lemmatize()
+        to return its lemma (base form). The function also removes any vague words that are not useful for searching PubMed.
+        :param self: Represent the instance of the class
+        :param question_words: str: Get the question words from the user
+        :param answer_words: str: Add the answer words to the list of words that will be searched for in pubmed
+        :return: A list of words that are not vague
+        """
         vague_words = ['recommendation', 'recommendations', 'products', 'product']
         words = question_words.lower().split(",") + answer_words.lower().split(",")
         return list(set(final_list))
     def convert_question_into_words(self, question: str):
+        """
+        The convert_question_into_words function takes a question and returns the words that are in the question.
+        The function first decomposes the original answer into its component parts, then it decomposes
+        each of those components into their own component parts. It then uses these decomposed answers to
+        find all the words that are in both questions and answers.
+        :param self: Make the function a method of the class
+        :param question: str: Pass in the question that is being asked
+        :return: A tuple of two elements
+        :doc-author: Trelent
+        """
         original_answer = format_prompt_and_query(self.prompts["og_answer_prompt"], question=question)
         print("Original Answer: ", original_answer)
         return words, original_answer
     def query_expert(self, question: str = None):
+        question = random.choice(self.default_questions) if question is None else question
         print("Question: ", question)
         keywords, original_response = self.convert_question_into_words(question)
         print("Keywords: ", keywords)
         context = fetch_pubmed_articles(" AND ".join(keywords), max_search=5)
+        print(context)
         if len(context) == 0:
             return {
         }
+herbal_expert = HerbalExpert(get_agent_executor())
 if __name__ == '__main__':
+    herbal_expert = HerbalExpert(get_agent_executor())
     answer = herbal_expert.query_expert("I'm experiencing consistent stress and anxiety. What herbs or supplements could help alleviate these symptoms?")
     print(answer['response'])
     # return to api? who knows