Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.env +2 -0
.gitattributes +1 -0
.gitignore +6 -0
README.md +2 -8
app.py +33 -0
docs_processor.py +48 -0
faiss_index_OpenAIEmbeddings/index.faiss +3 -0
faiss_index_OpenAIEmbeddings/index.pkl +3 -0
mvd_chatbot.py +58 -0
requirements.txt +15 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENAI_API_KEY=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
2	+

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+faiss_index_OpenAIEmbeddings/index.faiss filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+notifications_dir/
+.venv/
+docs/
+faiss_index/
+__pycache__

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: RAG Motor
-emoji: 📉
-colorFrom: yellow
-colorTo: green
 sdk: gradio
 sdk_version: 4.13.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: RAG-Motor
+app_file: app.py
 sdk: gradio
 sdk_version: 4.13.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# chatbot_ui.py
+import gradio as gr
+import os
+# Import necessary components from your chatbot implementation
+if "OPENAI_API_KEY" not in os.environ:
+    from dotenv import load_dotenv
+    load_dotenv()
+from mvd_chatbot import MVDAssistant
+# Initialize your chatbot
+chatbot = MVDAssistant()
+def chat_with_bot(message, history):
+    """
+    Function to get chatbot response for the user input.
+    """
+    try:
+        # Assuming the last message in history is the user's message
+        response = chatbot.run_query(message)
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create a Gradio ChatInterface
+iface = gr.ChatInterface(
+    fn=chat_with_bot,
+    title="RAG Chatbot",
+    description="Interact with the RAG Chatbot. Type your questions or statements below."
+)
+if __name__ == "__main__":
+    iface.launch()

docs_processor.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# docs_processor.py
+from langchain.document_loaders import DirectoryLoader
+from transformers import GPT2TokenizerFast
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings  # Import other embeddings as needed
+import os
+def index_docs(model_name, embedding_model):
+    INDEX_DIR = f"faiss_index_{model_name}"
+    if os.path.exists(INDEX_DIR):
+        db = FAISS.load_local(INDEX_DIR, embedding_model)
+    else:
+        documents = prepare_docs()
+        db = FAISS.from_documents(documents, embedding_model)
+        db.save_local(INDEX_DIR)
+    return db
+def prepare_docs():
+    # Loading
+    loader = DirectoryLoader('./docs/bare/')
+    docs = loader.load()
+    # Chunking
+    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+    text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
+        tokenizer, chunk_size=100, chunk_overlap=10
+    )
+    chunks = text_splitter.split_documents(docs)
+    return chunks
+def main():
+    db = index_docs()
+    q = ""
+    while q!="q":
+        q = input("Query:")
+        documents = db.similarity_search(q)
+        for doc in documents:
+            print(doc.page_content)
+            print(doc.metadata)
+            print("="*30)
+if __name__ == "__main__":
+    main()

faiss_index_OpenAIEmbeddings/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a4189363d1cff7f486b89de4af5661b216c7fc80d8acf24477e46b36d690940
+size 1394733

faiss_index_OpenAIEmbeddings/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87899ad31c84e9c38116e7f4d91434636bf7b0c28ca0df4ac27f74def55b1afe
+size 456303

mvd_chatbot.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from docs_processor import index_docs
+from langchain.chat_models import ChatOpenAI
+from langchain.agents import initialize_agent, Tool, AgentType
+from langchain.memory import ConversationBufferMemory
+from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
+class MVDAssistant:
+    def __init__(self, embedding_model=("OpenAIEmbeddings",OpenAIEmbeddings()), chat_model="gpt-4-1106-preview"):
+        self.llm = self.initialize_language_model(chat_model)
+        self.db = self.process_documents(*embedding_model)
+        self.memory = self.initialize_memory("chat_history", True)
+        self.tools = self.setup_tools(self.db)
+        self.agent = self.setup_agent(self.tools, self.llm, self.memory, False)
+    def initialize_language_model(self, model_name):
+        return ChatOpenAI(model_name=model_name)
+    def process_documents(self, model_name, embedding_model):
+        return index_docs(model_name, embedding_model)
+    def initialize_memory(self, memory_key, return_messages):
+        return ConversationBufferMemory(memory_key=memory_key, return_messages=return_messages)
+    def setup_tools(self, db):
+        return [
+            Tool(
+                name="Retrieve Info",
+                description="Tool to retrieve information from the indexed documents.",
+                func=lambda q: db.similarity_search(q)
+            )
+        ]
+    def setup_agent(self, tools, llm, memory, verbose):
+        return initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, memory=memory, verbose=verbose)
+    def run_query(self, query):
+        for x in range(10): # retry n times
+            try:
+                res = self.agent.run(query)
+                break;
+            except Exception as e:
+                print("Error:", e)
+        return res
+def main():
+    agent = MVDAssistant()
+    q = input("Query: ")
+    while q:
+        answer = agent.run_query(q)
+        print("Answer".center(30, "="))
+        print(answer)
+        print("="*30)
+        q = input("Query: ")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+Requests
+scipy
+transformers
+openai
+langchain
+huggingface_hub==0.17
+tiktoken
+unstructured
+unstructured[pdf]
+unstructured[docx]
+openpyxl
+pandas
+nltk
+unstructured[md]
+faiss-gpu