Spaces:

LouminAI-Labs
/

ILYA_docs_RAG

Runtime error

App Files Files Community

dryouviavant commited on May 29, 2024

Commit

410bc42

verified ·

1 Parent(s): a3de672

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -32

app.py CHANGED Viewed

@@ -1,25 +1,24 @@
 import gradio as gr
 import os
-from langchain_community.vectorstores import FAISS
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.llms import HuggingFaceEndpoint
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from dotenv import load_dotenv
 import torch
-# Load environment variables
 load_dotenv()
 api_token = os.getenv("HF_TOKEN")
-# List of available LLMs
 list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load and split PDF document
-def load_doc(list_file_path, chunk_size=1024, chunk_overlap=64):
     loaders = [PyPDFLoader(x) for x in list_file_path]
     pages = []
     for loader in loaders:
@@ -126,7 +125,7 @@ def conversation(qa_chain, message, history, persona_text):
 def upload_file(file_obj):
     list_file_path = []
     for idx, file in enumerate(file_obj):
-        file_path = file.name
         list_file_path.append(file_path)
     return list_file_path
@@ -136,52 +135,54 @@ def demo():
     with gr.Blocks(theme=gr.themes.Default(primary_hue="sky")) as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
-        gr.HTML("<center><h1>RAG PDF Chatbot</h1><center>")
-        gr.Markdown("""<b>Interact with Your PDF Documents!</b> This AI agent performs retrieval-augmented generation (RAG) on PDF documents. Hosted on Hugging Face Hub for demonstration purposes. \
-        <b>Do not upload confidential documents.</b>""")
         # Interface for static pre-selected documents
         gr.Markdown("<b>Pre-Selected Documents</b>")
-        gr.Textbox(value="Document 1: Introduction to AI.pdf", show_label=False, interactive=False)
-        gr.Textbox(value="Document 2: Advanced Machine Learning.pdf", show_label=False, interactive=False)
-        gr.Markdown("<b>Upload Your PDF Documents</b>")
-        document = gr.Files(height=300, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload PDF documents")
-        db_btn = gr.Button("Create vector database")
-        db_progress = gr.Textbox(value="Not initialized", show_label=False)
-        gr.Markdown("<b>Select Large Language Model (LLM) and Configure Parameters</b>")
         llm_btn = gr.Radio(list_llm_simple, label="Available LLMs", value=list_llm_simple[0], type="index")
         slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", info="Controls randomness in token generation", interactive=True)
         slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max New Tokens", info="Maximum number of tokens to be generated", interactive=True)
-        slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Top-K", info="Number of tokens to select the next token from", interactive=True)
         qachain_btn = gr.Button("Initialize Question Answering Chatbot")
         llm_progress = gr.Textbox(value="Not initialized", show_label=False)
-        gr.Markdown("<b>Chat with Your Document</b>")
         chatbot = gr.Chatbot(height=505)
-        doc_source1 = gr.Textbox(label="Reference 1", lines=2, interactive=False)
-        source1_page = gr.Number(label="Page", interactive=False)
-        doc_source2 = gr.Textbox(label="Reference 2", lines=2, interactive=False)
-        source2_page = gr.Number(label="Page", interactive=False)
-        doc_source3 = gr.Textbox(label="Reference 3", lines=2, interactive=False)
-        source3_page = gr.Number(label="Page", interactive=False)
         msg = gr.Textbox(placeholder="Ask a question", container=True)
         submit_btn = gr.Button("Submit")
         clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
-        # Bind the events
         db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])
         qachain_btn.click(initialize_LLM, inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], outputs=[qa_chain, llm_progress]).then(lambda: [None, "", 0, "", 0, "", 0],
             inputs=None,
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
             queue=False)
         msg.submit(conversation, inputs=[qa_chain, msg, chatbot, persona_text], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
         submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot, persona_text], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
-        clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])
     demo.queue().launch(debug=True)
 if __name__ == "__main__":
-    demo()

 import gradio as gr
 import os
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import PyPDFLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import HuggingFaceEndpoint
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from dotenv import load_dotenv
 import torch
 load_dotenv()
 api_token = os.getenv("HF_TOKEN")
 list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load and split PDF document
+def load_doc(list_file_path, chunk_size=512, chunk_overlap=64):
     loaders = [PyPDFLoader(x) for x in list_file_path]
     pages = []
     for loader in loaders:
 def upload_file(file_obj):
     list_file_path = []
     for idx, file in enumerate(file_obj):
+        file_path = file_obj.name
         list_file_path.append(file_path)
     return list_file_path
     with gr.Blocks(theme=gr.themes.Default(primary_hue="sky")) as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
+        gr.HTML("<center><h1>RAG PDF chatbot</h1><center>")
+        gr.Markdown("""<b>Query your PDF documents!</b> This AI agent is designed to perform retrieval augmented generation (RAG) on PDF documents. The app is hosted on Hugging Face Hub for the sole purpose of demonstration. <b>Please do not upload confidential documents.</b>""")
         # Interface for static pre-selected documents
         gr.Markdown("<b>Pre-Selected Documents</b>")
+        gr.Textbox(value="Document 1: ...", show_label=False, interactive=False)
+        gr.Textbox(value="Document 2: ...", show_label=False, interactive=False)
+        gr.Markdown("<b>Select Large Language Model (LLM) and Input Parameters</b>")
         llm_btn = gr.Radio(list_llm_simple, label="Available LLMs", value=list_llm_simple[0], type="index")
         slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", info="Controls randomness in token generation", interactive=True)
         slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max New Tokens", info="Maximum number of tokens to be generated", interactive=True)
+        slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k", info="Number of tokens to select the next token from", interactive=True)
         qachain_btn = gr.Button("Initialize Question Answering Chatbot")
         llm_progress = gr.Textbox(value="Not initialized", show_label=False)
+        gr.Markdown("<b>Chat with your Document</b>")
         chatbot = gr.Chatbot(height=505)
+        with gr.Accordion("Relevant context from the source document", open=False):
+            with gr.Row():
+                doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
+                source1_page = gr.Number(label="Page", scale=1)
+            with gr.Row():
+                doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
+                source2_page = gr.Number(label="Page", scale=1)
+            with gr.Row():
+                doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
+                source3_page = gr.Number(label="Page", scale=1)
         msg = gr.Textbox(placeholder="Ask a question", container=True)
         submit_btn = gr.Button("Submit")
         clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
+        # Preprocessing events
+        db_btn = gr.Button("Create vector database")
+        db_progress = gr.Textbox(value="Not initialized", show_label=False)
         db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])
         qachain_btn.click(initialize_LLM, inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], outputs=[qa_chain, llm_progress]).then(lambda: [None, "", 0, "", 0, "", 0],
             inputs=None,
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
             queue=False)
+        # Chatbot events
         msg.submit(conversation, inputs=[qa_chain, msg, chatbot, persona_text], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
         submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot, persona_text], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
+        clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
     demo.queue().launch(debug=True)
 if __name__ == "__main__":
+    demo()