Spaces:

sabazo
/

innoSageAgentOne

Sleeping

App Files Files Community

Asaad Almutareb commited on Mar 20, 2024

Commit

d713a77

1 Parent(s): 57e87b0

reordered the tools and adjust their descriptions

Browse files

added the new tools to the active toolkit
added fastapi wrapper for gradio

Files changed (4) hide show

app.py +6 -4
hf_mixtral_agent.py +3 -1
innovation_pathfinder_ai/structured_tools/structured_tools.py +73 -74
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from hf_mixtral_agent import agent_executor
 from innovation_pathfinder_ai.source_container.container import (
@@ -19,6 +20,8 @@ dotenv.load_dotenv()
 logger = logger.get_console_logger("app")
 def initialize_chroma_db() -> Chroma:
     collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
@@ -102,8 +105,7 @@ if __name__ == "__main__":
             )
             clear.click(lambda: None, None, chatbot, queue=False)
-    demo.queue()
-    demo.launch(debug=True, share=True)
-    x = 0 # for debugging purposes

+from fastapi import FastAPI
 import gradio as gr
 from hf_mixtral_agent import agent_executor
 from innovation_pathfinder_ai.source_container.container import (
 logger = logger.get_console_logger("app")
+app = FastAPI()
 def initialize_chroma_db() -> Chroma:
     collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
             )
             clear.click(lambda: None, None, chatbot, queue=False)
+    demo.queue().launch(debug=True, share=True)
+    x = 0 # for debugging purposes
+    app = gr.mount_gradio_app(app, demo, path="/")

hf_mixtral_agent.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain.tools.render import render_text_description
 import os
 from dotenv import load_dotenv
 from innovation_pathfinder_ai.structured_tools.structured_tools import (
-    arxiv_search, get_arxiv_paper, google_search, wikipedia_search
 )
 from langchain.prompts import PromptTemplate
@@ -36,6 +36,8 @@ llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
 tools = [
     arxiv_search,
     wikipedia_search,
     google_search,

 import os
 from dotenv import load_dotenv
 from innovation_pathfinder_ai.structured_tools.structured_tools import (
+    arxiv_search, get_arxiv_paper, google_search, wikipedia_search, knowledgeBase_search, memory_search
 )
 from langchain.prompts import PromptTemplate
 tools = [
+    memory_search,
+    knowledgeBase_search,
     arxiv_search,
     wikipedia_search,
     google_search,

innovation_pathfinder_ai/structured_tools/structured_tools.py CHANGED Viewed

@@ -35,65 +35,35 @@ import os
 # from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
 @tool
-def arxiv_search(query: str) -> str:
-    """Search arxiv database for scientific research papers and studies. This is your primary information source.
-    always check it first when you search for information, before using any other tool."""
-    global all_sources
-    arxiv_retriever = ArxivRetriever(load_max_docs=3)
-    data = arxiv_retriever.invoke(query)
-    meta_data = [i.metadata for i in data]
-    formatted_sources = format_arxiv_documents(data)
-    all_sources += formatted_sources
-    parsed_sources = parse_list_to_dicts(formatted_sources)
-    add_many(parsed_sources)
-    return data.__str__()
-@tool
-def get_arxiv_paper(paper_id:str) -> None:
-    """Download a paper from axriv to download a paper please input
-    the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
-    If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
-    "2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
-    free of additional information only have the id.
-    """
-    # code from https://lukasschwab.me/arxiv.py/arxiv.html
-    paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
-    number_without_period = paper_id.replace('.', '')
-    # Download the PDF to a specified directory with a custom filename.
-    paper.download_pdf(dirpath="./downloaded_papers", filename=f"{number_without_period}.pdf")
-@tool
-def google_search(query: str) -> str:
-    """Search Google for additional results when you can't answer questions using arxiv search or wikipedia search."""
-    global all_sources
-    websearch = GoogleSearchAPIWrapper()
-    search_results:dict = websearch.results(query, 3)
-    cleaner_sources =format_search_results(search_results)
-    parsed_csources = parse_list_to_dicts(cleaner_sources)
-    add_many(parsed_csources)
-    all_sources += cleaner_sources
-    return cleaner_sources.__str__()
-@tool
-def wikipedia_search(query: str) -> str:
-    """Search Wikipedia for additional information to expand on research papers or when no papers can be found."""
-    global all_sources
-    api_wrapper = WikipediaAPIWrapper()
-    wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
-    wikipedia_results = wikipedia_search.run(query)
-    all_sources += create_wikipedia_urls_from_text(wikipedia_results)
-    return wikipedia_results
 @tool
-def chroma_search(query:str) -> str:
-    """Search the Arxiv vector store for docmunets and relevent chunks"""
     # Since we have more than one collections we should change the name of this tool
     client = chromadb.PersistentClient(
     # path=persist_directory,
@@ -117,6 +87,36 @@ def chroma_search(query:str) -> str:
     return docs.__str__()
 @tool
 def embed_arvix_paper(paper_id:str) -> None:
@@ -158,27 +158,26 @@ def embed_arvix_paper(paper_id:str) -> None:
     )
 @tool
-def conversational_search(query:str) -> str:
-    """Search from past conversations  for docmunets and relevent chunks"""
-    # Since we have more than one collections we should change the name of this tool
-    client = chromadb.PersistentClient(
-    # path=persist_directory,
-    )
-    collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
-    #store using envar
-    embedding_function = SentenceTransformerEmbeddings(
-        model_name="all-MiniLM-L6-v2",
-        )
-    vector_db = Chroma(
-    client=client, # client for Chroma
-    collection_name=collection_name,
-    embedding_function=embedding_function,
-    )
-    retriever = vector_db.as_retriever()
-    docs = retriever.get_relevant_documents(query)
-    return docs.__str__()

 # from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
 @tool
+def memory_search(query:str) -> str:
+    """Search the memory vector store for existing knowledge and relevent pervious researches. \
+        This is your primary source to start your search with checking what you already have learned from the past, before going online."""
+    # Since we have more than one collections we should change the name of this tool
+    client = chromadb.PersistentClient(
+    # path=persist_directory,
+    )
+    collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
+    #store using envar
+    embedding_function = SentenceTransformerEmbeddings(
+        model_name="all-MiniLM-L6-v2",
+        )
+    vector_db = Chroma(
+    client=client, # client for Chroma
+    collection_name=collection_name,
+    embedding_function=embedding_function,
+    )
+    retriever = vector_db.as_retriever()
+    docs = retriever.get_relevant_documents(query)
+    return docs.__str__()
 @tool
+def knowledgeBase_search(query:str) -> str:
+    """Search the internal knowledge base for research papers and relevent chunks"""
     # Since we have more than one collections we should change the name of this tool
     client = chromadb.PersistentClient(
     # path=persist_directory,
     return docs.__str__()
+@tool
+def arxiv_search(query: str) -> str:
+    """Search arxiv database for scientific research papers and studies. This is your primary online information source.
+    always check it first when you search for additional information, before using any other online tool."""
+    global all_sources
+    arxiv_retriever = ArxivRetriever(load_max_docs=3)
+    data = arxiv_retriever.invoke(query)
+    meta_data = [i.metadata for i in data]
+    formatted_sources = format_arxiv_documents(data)
+    all_sources += formatted_sources
+    parsed_sources = parse_list_to_dicts(formatted_sources)
+    add_many(parsed_sources)
+    return data.__str__()
+@tool
+def get_arxiv_paper(paper_id:str) -> None:
+    """Download a paper from axriv to download a paper please input
+    the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
+    If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
+    "2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
+    free of additional information only have the id.
+    """
+    # code from https://lukasschwab.me/arxiv.py/arxiv.html
+    paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
+    number_without_period = paper_id.replace('.', '')
+    # Download the PDF to a specified directory with a custom filename.
+    paper.download_pdf(dirpath="./downloaded_papers", filename=f"{number_without_period}.pdf")
 @tool
 def embed_arvix_paper(paper_id:str) -> None:
     )
 @tool
+def wikipedia_search(query: str) -> str:
+    """Search Wikipedia for additional information to expand on research papers or when no papers can be found."""
+    global all_sources
+    api_wrapper = WikipediaAPIWrapper()
+    wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
+    wikipedia_results = wikipedia_search.run(query)
+    all_sources += create_wikipedia_urls_from_text(wikipedia_results)
+    return wikipedia_results
+@tool
+def google_search(query: str) -> str:
+    """Search Google for additional results when you can't answer questions using arxiv search or wikipedia search."""
+    global all_sources
+    websearch = GoogleSearchAPIWrapper()
+    search_results:dict = websearch.results(query, 3)
+    cleaner_sources =format_search_results(search_results)
+    parsed_csources = parse_list_to_dicts(cleaner_sources)
+    add_many(parsed_csources)
+    all_sources += cleaner_sources
+    return cleaner_sources.__str__()

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ chromadb
 google_api_python_client
 pypdf2
 sqlmodel
-rich

 google_api_python_client
 pypdf2
 sqlmodel
+rich
+fastapi