algowizmomo commited on
Commit
2dcdca9
·
1 Parent(s): f691c7f

Upload folder using huggingface_hub

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY=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
2
+
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index_OpenAIEmbeddings/index.faiss filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ notifications_dir/
2
+ .venv/
3
+ docs/
4
+ faiss_index/
5
+ __pycache__
6
+
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: RAG Motor
3
- emoji: 📉
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.13.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: RAG-Motor
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.13.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chatbot_ui.py
2
+
3
+ import gradio as gr
4
+ import os
5
+ # Import necessary components from your chatbot implementation
6
+ if "OPENAI_API_KEY" not in os.environ:
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ from mvd_chatbot import MVDAssistant
11
+ # Initialize your chatbot
12
+ chatbot = MVDAssistant()
13
+
14
+ def chat_with_bot(message, history):
15
+ """
16
+ Function to get chatbot response for the user input.
17
+ """
18
+ try:
19
+ # Assuming the last message in history is the user's message
20
+ response = chatbot.run_query(message)
21
+ return response
22
+ except Exception as e:
23
+ return f"Error: {str(e)}"
24
+
25
+ # Create a Gradio ChatInterface
26
+ iface = gr.ChatInterface(
27
+ fn=chat_with_bot,
28
+ title="RAG Chatbot",
29
+ description="Interact with the RAG Chatbot. Type your questions or statements below."
30
+ )
31
+
32
+ if __name__ == "__main__":
33
+ iface.launch()
docs_processor.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docs_processor.py
2
+ from langchain.document_loaders import DirectoryLoader
3
+ from transformers import GPT2TokenizerFast
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings # Import other embeddings as needed
7
+
8
+ import os
9
+
10
+ def index_docs(model_name, embedding_model):
11
+ INDEX_DIR = f"faiss_index_{model_name}"
12
+
13
+ if os.path.exists(INDEX_DIR):
14
+ db = FAISS.load_local(INDEX_DIR, embedding_model)
15
+ else:
16
+ documents = prepare_docs()
17
+ db = FAISS.from_documents(documents, embedding_model)
18
+ db.save_local(INDEX_DIR)
19
+ return db
20
+
21
+ def prepare_docs():
22
+ # Loading
23
+ loader = DirectoryLoader('./docs/bare/')
24
+ docs = loader.load()
25
+
26
+ # Chunking
27
+ tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
28
+ text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
29
+ tokenizer, chunk_size=100, chunk_overlap=10
30
+ )
31
+ chunks = text_splitter.split_documents(docs)
32
+ return chunks
33
+
34
+
35
+ def main():
36
+
37
+ db = index_docs()
38
+ q = ""
39
+ while q!="q":
40
+ q = input("Query:")
41
+ documents = db.similarity_search(q)
42
+ for doc in documents:
43
+ print(doc.page_content)
44
+ print(doc.metadata)
45
+ print("="*30)
46
+
47
+ if __name__ == "__main__":
48
+ main()
faiss_index_OpenAIEmbeddings/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4189363d1cff7f486b89de4af5661b216c7fc80d8acf24477e46b36d690940
3
+ size 1394733
faiss_index_OpenAIEmbeddings/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87899ad31c84e9c38116e7f4d91434636bf7b0c28ca0df4ac27f74def55b1afe
3
+ size 456303
mvd_chatbot.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from docs_processor import index_docs
2
+ from langchain.chat_models import ChatOpenAI
3
+ from langchain.agents import initialize_agent, Tool, AgentType
4
+ from langchain.memory import ConversationBufferMemory
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
6
+
7
+ class MVDAssistant:
8
+ def __init__(self, embedding_model=("OpenAIEmbeddings",OpenAIEmbeddings()), chat_model="gpt-4-1106-preview"):
9
+ self.llm = self.initialize_language_model(chat_model)
10
+ self.db = self.process_documents(*embedding_model)
11
+ self.memory = self.initialize_memory("chat_history", True)
12
+ self.tools = self.setup_tools(self.db)
13
+ self.agent = self.setup_agent(self.tools, self.llm, self.memory, False)
14
+
15
+ def initialize_language_model(self, model_name):
16
+ return ChatOpenAI(model_name=model_name)
17
+
18
+ def process_documents(self, model_name, embedding_model):
19
+ return index_docs(model_name, embedding_model)
20
+
21
+ def initialize_memory(self, memory_key, return_messages):
22
+ return ConversationBufferMemory(memory_key=memory_key, return_messages=return_messages)
23
+
24
+ def setup_tools(self, db):
25
+ return [
26
+ Tool(
27
+ name="Retrieve Info",
28
+ description="Tool to retrieve information from the indexed documents.",
29
+ func=lambda q: db.similarity_search(q)
30
+ )
31
+ ]
32
+
33
+ def setup_agent(self, tools, llm, memory, verbose):
34
+ return initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, memory=memory, verbose=verbose)
35
+
36
+ def run_query(self, query):
37
+ for x in range(10): # retry n times
38
+ try:
39
+ res = self.agent.run(query)
40
+ break;
41
+ except Exception as e:
42
+ print("Error:", e)
43
+ return res
44
+
45
+
46
+ def main():
47
+ agent = MVDAssistant()
48
+
49
+ q = input("Query: ")
50
+ while q:
51
+ answer = agent.run_query(q)
52
+ print("Answer".center(30, "="))
53
+ print(answer)
54
+ print("="*30)
55
+ q = input("Query: ")
56
+
57
+ if __name__ == "__main__":
58
+ main()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requests
2
+ scipy
3
+ transformers
4
+ openai
5
+ langchain
6
+ huggingface_hub==0.17
7
+ tiktoken
8
+ unstructured
9
+ unstructured[pdf]
10
+ unstructured[docx]
11
+ openpyxl
12
+ pandas
13
+ nltk
14
+ unstructured[md]
15
+ faiss-gpu