Spaces:

sudhanshu7
/

Multi-Model-Agent

Sleeping

App Files Files Community

ANeuronI commited on Aug 3, 2024

Commit

1842d77

1 Parent(s): 38dde5a

commit

Browse files

Files changed (6) hide show

.streamlit/secrets.toml +2 -0
FINALAPP.py +206 -0
README.md +33 -0
_init_.py +1 -0
requirements.txt +12 -0
tools.py +44 -0

.streamlit/secrets.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ GOOGLE_API_KEY = "AIzaSyDhICh8yNoZ2dgtmuC-bw8byX_7ELvaHIc"
2	+ GOOGLE_CSE_ID="05aad8a0821c14286"

FINALAPP.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import tempfile
+import streamlit as st
+from dotenv import load_dotenv
+from pdfminer.high_level import extract_text
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain.memory import ConversationBufferMemory
+from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
+from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain.chains.llm import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+from langchain.agents import initialize_agent, load_tools
+# Check if the secrets file exists and load it
+secrets_exists = os.path.exists(os.path.join(os.getcwd(), ".streamlit", "secrets.toml")) or \
+                 os.path.exists(os.path.join(os.path.expanduser("~"), ".streamlit", "secrets.toml"))
+if secrets_exists:
+    load_dotenv(os.path.join(os.getcwd(), ".streamlit", "secrets.toml"))
+# Function to extract text from PDFs
+def extract_text_from_pdfs(docs):
+    text = ""
+    for doc in docs:
+        try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+                tmp_file.write(doc.getbuffer())
+                tmp_file_path = tmp_file.name
+            extracted_text = extract_text(tmp_file_path)
+            text += extracted_text
+        except Exception as e:
+            st.error(f"Error processing {doc.name}: {e}")
+        finally:
+            os.remove(tmp_file_path)
+    return text
+# Function to split text into chunks
+def get_text_chunks(raw_text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
+    chunks = text_splitter.split_text(raw_text)
+    return chunks
+# Function to create FAISS index
+def create_faiss_index(text_chunks):
+    model_name = "BAAI/bge-small-en"
+    model_kwargs = {"device": "cpu"}
+    encode_kwargs = {"normalize_embeddings": True}
+    embeddings = HuggingFaceBgeEmbeddings(model_name=model_name,
+                                          model_kwargs=model_kwargs,
+                                          encode_kwargs=encode_kwargs)
+    vector_store = FAISS.from_texts(text_chunks, embeddings)
+    return vector_store
+# Function to get the conversation chain
+def get_conversation_chain(vector_store, groq_api_key):
+    llm = ChatGroq(
+        temperature=0.7,
+        model="llama3-70b-8192",
+        api_key=groq_api_key,
+        streaming=True,
+        verbose=True
+    )
+    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+    prompt_template = PromptTemplate(
+        input_variables=["question"],
+        template="""You are an AI language model assistant. Your task is to generate 3
+        different versions of the given user question to retrieve relevant documents from
+        a vector database. By generating multiple perspectives on the user question, your
+        goal is to help the user overcome some of the limitations of the distance-based
+        similarity search. Provide these alternative questions separated by newlines.
+        Original question: {question}""",
+    )
+    llm_chain = LLMChain(llm=llm, prompt=prompt_template)
+    retriever = MultiQueryRetriever(retriever=vector_store.as_retriever(), llm_chain=llm_chain, num_queries=3)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        memory=memory
+    )
+    return conversation_chain, llm
+# Function to get the web agent
+def get_web_agent(groq_api_key):
+    llm = ChatGroq(
+        temperature=0.7,
+        model="llama3-70b-8192",
+        api_key=groq_api_key,
+        streaming=True,
+        verbose=True
+    )
+    # can create custom tools
+    tools = load_tools([], llm=llm)
+    from tools import summarizer_tool
+    tools.append(summarizer_tool)
+    additional_tools = load_tools(["llm-math", "google-search"], llm=llm)
+    tools.extend(additional_tools)
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    ZERO_SHOT_REACT_DESCRIPTION = initialize_agent(
+        agent='zero-shot-react-description',
+        tools=tools,
+        llm=llm,
+        verbose=True,
+        max_iterations=10,
+        memory=memory,
+        handle_parsing_errors=True
+    )
+    return ZERO_SHOT_REACT_DESCRIPTION
+# Main function
+def main():
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+        st.session_state.chat_history = []
+        st.session_state.vector_store = None
+    st.set_page_config(page_title="Multi Model Agent", page_icon=":books:")
+    st.markdown("<h2 style='text-align: center;'>AI Agent 🤖</h2>", unsafe_allow_html=True)
+    with st.sidebar:
+        st.markdown('📖 API_KEYS  [REPO](https://github.com/ANeuronI/RAG-AGENT)')
+        st.title("📤 Upload Pdf ")
+        docs = st.file_uploader(" ", type=["pdf"], accept_multiple_files=True)
+        file_details = []
+        if docs is not None:
+            for doc in docs:
+                file_details.append({"FileName": doc.name})
+        with st.expander("Uploaded Files"):
+            if file_details:
+                for details in file_details:
+                    st.write(f"File Name: {details['FileName']}")
+        st.subheader("Start Model🧠")
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if groq_api_key:
+            st.success('Groq API key already provided!', icon='✅')
+        else:
+            groq_api_key = st.text_input('Enter Groq API key:', type='password', key='groq_api_key')
+            if groq_api_key and (groq_api_key.startswith('gsk_') and len(groq_api_key) == 56):
+                os.environ['GROQ_API_KEY'] = groq_api_key
+                st.success('Groq API key provided!', icon='✅')
+            else:
+                st.warning('Please enter a valid Groq API key!', icon='⚠️')
+        if st.button("Start Inference", key="start_inference") and docs:
+            with st.spinner("Processing..."):
+                raw_text = extract_text_from_pdfs(docs)
+                if raw_text:
+                    text_chunks = get_text_chunks(raw_text)
+                    vector_store = create_faiss_index(text_chunks)
+                    st.session_state.vector_store = vector_store
+                    st.write("FAISS Vector Store created successfully.")
+                    st.session_state.conversation, llm = get_conversation_chain(vector_store, groq_api_key)
+                    st.session_state.llm = llm
+                    st.session_state.web_agent = get_web_agent(groq_api_key)
+                else:
+                    st.error("No text extracted from the documents.")
+    if st.session_state.conversation:
+        for message in st.session_state.chat_history:
+            if message['role'] == 'user':
+                with st.chat_message("user"):
+                    st.write(message["content"])
+            else:
+                with st.chat_message("assistant"):
+                    st.write(message["content"])
+        input_disabled = groq_api_key is None
+        if prompt := st.chat_input("Ask your question here..." , disabled=input_disabled):
+            st.session_state.chat_history.append({"role": "user", "content": prompt})
+            with st.chat_message("user"):
+                st.write(prompt)
+            with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    response = st.session_state.conversation({"question": prompt})
+                    if "answer" in response and "I don't know" not in response["answer"]:
+                        st.session_state.chat_history.append({"role": "assistant", "content": response['answer']})
+                        st.write(response['answer'])
+                    else:
+                        with st.spinner("Searching the web..."):
+                            response = st.session_state.web_agent.run(prompt)
+                            st.session_state.chat_history.append({"role": "assistant", "content": response})
+                            st.write(response)
+if __name__ == '__main__':
+    main()

README.md CHANGED Viewed

@@ -11,3 +11,36 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Multi-Model LLM Agent
+Welcome to the Multi-Model LLM Agent repository! This repository hosts two versions of our language model agent, each offering unique capabilities tailored to different needs.
+## Version 1: RAG Agent
+- Basic web search functionality
+- Implementation of RAG (Retrieval-Augmented Generation)
+[Explore Version 1 (RAG Agent)](https://multi-model-rag-agent.streamlit.app/)
+## Version 2: Multi-Model Agent (Final Version)
+- Advanced web search capabilities, including website scraping
+- Enhanced RAG model with memory for retaining context across conversations
+- Multi-query retrieval for handling complex information needs
+[Explore Version 2 (Multi-Model Agent)](https://)
+## How to Use This Model
+### Obtain API Keys
+Before using the Multi-Model Agent, you need to obtain API keys from the following providers:
+- **GROQ API Keys:** Obtain from [GROQ](https://console.groq.com/keys)
+- **Replicate API Keys:** Obtain from [Replicate](https://replicate.com/meta/meta-llama-3-70b-instruct)
+### Integration Instructions
+1. **API Key Setup:** Insert your obtained API keys into the designated configuration.
+2. **Usage Guide:** Refer to our detailed documentation for integrating the API keys and utilizing the Multi-Model Agent effectively.
+## Notes
+- **Security:** Keep your API keys secure and adhere to the terms of service of each provider.

_init_.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # _init_.py

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit==1.36.0
+pdfminer.six==20221105
+langchain
+langchain-community
+faiss-cpu==1.7.3
+langchain-groq
+python-dotenv==1.0.0
+langchain-huggingface
+wikipedia
+replicate
+numexpr
+google-api-python-client>=2.100.0

tools.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from langchain_groq import ChatGroq
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.tools import Tool
+import os
+# summeriser
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    raise ValueError("GROQ_API_KEY environment variable is not set.")
+# Initialize ChatGroq for summarization
+summarizer_llm = ChatGroq(
+    temperature=0.7,
+    model="llama3-8b-8192",
+    api_key=GROQ_API_KEY,
+    streaming=True,
+    verbose=True
+)
+# Define a prompt template for summarization
+summarization_prompt = PromptTemplate(
+    input_variables=["text"],
+    template="Summarize the following content: {text}"
+)
+# Create the summarization chain
+summarization_chain = LLMChain(
+    llm=summarizer_llm,
+    prompt=summarization_prompt
+)
+# Define the summarizer tool
+def summarize_content_tool(text: str) -> str:
+    return summarization_chain.run(text=text)
+summarizer_tool = Tool(
+    name="summarizer",
+    description="Summarizes content using a language model.",
+    func=summarize_content_tool
+)