Spaces:

vanderbilt-dsi
/

grant-writing-assistant

Runtime error

App Files Files Community

umangchaudhry commited on May 18, 2023

Commit

bfabbde

1 Parent(s): 974df2a

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -33

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import openai
 import random
 import time
-import gradio as gr
-import os
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import DeepLake
 from langchain.chat_models import ChatOpenAI
@@ -13,114 +15,145 @@ from langchain.document_loaders import PyPDFDirectoryLoader
 from langchain.memory import ConversationBufferMemory
 from langchain.llms import OpenAI
 def set_api_key(key):
-    os.environ["OPENAI_API_KEY"] = key
-    return f"Your API Key has been set to: {key}"
 def reset_api_key():
-    os.environ["OPENAI_API_KEY"] = ""
-    return "Your API Key has been reset"
 def get_api_key():
-    api_key = os.getenv("OPENAI_API_KEY")
     return api_key
 def set_model(model):
-    os.environ["OPENAI_MODEL"] = model
-    return f"{model} selected"
 def get_model():
-    model = os.getenv("OPENAI_MODEL")
-    return model
 def upload_file(files):
-    file_paths = [file.name for file in files]
     return file_paths
 def create_vectorstore(files):
-    pdf_dir = files.name
-    pdf_loader = PyPDFDirectoryLoader(pdf_dir)
-    pdf_docs = pdf_loader.load_and_split()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(pdf_docs)
-    embeddings = OpenAIEmbeddings()
     db = DeepLake.from_documents(texts, dataset_path="./documentation_db", embedding=embeddings, overwrite=True)
-    return "Vectorstore Successfully Created"
 def respond(message, chat_history):
     # Get embeddings
     embeddings = OpenAIEmbeddings()
-    #Connect to existing vectorstore
     db = DeepLake(dataset_path="./documentation_db", embedding_function=embeddings, read_only=True)
-    #Set retriever settings
     retriever = db.as_retriever(search_kwargs={"distance_metric":'cos',
                                                "fetch_k":10,
                                                "maximal_marginal_relevance":True,
                                                "k":10})
     if len(chat_history) != 0:
         chat_history = [(chat_history[0][0], chat_history[0][1])]
     model = get_model()
     # Create ChatOpenAI and ConversationalRetrievalChain
     model = ChatOpenAI(model=model)
     qa = ConversationalRetrievalChain.from_llm(model, retriever)
     bot_message = qa({"question": message, "chat_history": chat_history})
     chat_history = [(message, bot_message["answer"])]
-    time.sleep(1)
-    return "", chat_history
 with gr.Blocks() as demo:
     gr.Markdown("<h1 style='text-align: center;'>Langchain Coding Assistant</h1>")
     gr.Markdown("## This Gradio app is powered by ChatGPT and LangChain. You can submit your OpenAI API key and use the chatbot to get assistance with grant writing. \n ### 1. Enter your OpenAI API key. \n ### 2. Click 'Submit' to set your API key.\n ### 3. Upload the documents that you would like the model to be aware of and then create the vectorstore. Please note that once the vectorstore is created, it will persist and the documents will remain in the vectorstore. \n 4. Provide your prompt.")
     with gr.Row():
-        #create textbox for API input
         api_input = gr.Textbox(label = "API Key",
                                placeholder = "Please provide your OpenAI API key here.")
-        #create textbox to validate API
         api_key_status = gr.Textbox(label = "API Key Status",
                                              placeholder = "Your API Key has not be set yet. Please enter your key.",
                                              interactive = False)
-    #create button to submit API key
     api_submit_button = gr.Button("Submit")
-    #set api_submit_button functionality
     api_submit_button.click(set_api_key, inputs=api_input, outputs=api_key_status)
-    #create button to reset API key
     api_reset_button = gr.Button("Clear API Key from session")
-    #set api_reset_button functionality
     api_reset_button.click(reset_api_key, outputs=api_key_status)
     with gr.Row():
         with gr.Column():
-            #create dropdown to select model (gpt-3.5-turbo or gpt4)
             model_selection = gr.Dropdown(
             ["gpt-3.5-turbo", "gpt-4"], label="Model Selection", info="Please ensure you provide the API Key that corresponds to the Model you select!"
         )
-            #create button to submit model selection
             model_submit_button = gr.Button("Submit Model Selection")
         model_status = gr.Textbox(label = "Selected Model", interactive = False, lines=4)
-        #set model_submit_button functionality
         model_submit_button.click(set_model, inputs=model_selection, outputs=model_status)
     file_output = gr.File(label = "Uploaded files - Please note these files are persistent and will not be automatically deleted")
     upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["pdf"], file_count="multiple")
     upload_button.upload(upload_file, upload_button, file_output)
     create_vectorstore_button = gr.Button("Click to create the vectorstore for your uploaded documents")
     db_output = gr.Textbox(label = "Vectorstore Status")
     create_vectorstore_button.click(create_vectorstore, inputs=file_output, outputs = db_output)
     chatbot = gr.Chatbot(label="ChatGPT Powered Grant Writing Assistant")
     msg = gr.Textbox(label="User Prompt", placeholder="Your Query Here")
     clear = gr.Button("Clear")
     msg.submit(respond, inputs = [msg, chatbot], outputs = [msg, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
 demo.launch()

+# Import necessary libraries
 import openai
 import random
 import time
+import gradio as gr  # Gradio is a library for creating UIs for ML models
+import os  # This module provides functions to interact with the operating system
+# Importing various classes and functions from the langchain package
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import DeepLake
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.llms import OpenAI
+# Function to set the OpenAI API key
 def set_api_key(key):
+    os.environ["OPENAI_API_KEY"] = key  # Sets an environment variable with the key
+    return f"Your API Key has been set to: {key}"  # Returns a confirmation message
+# Function to reset the OpenAI API key
 def reset_api_key():
+    os.environ["OPENAI_API_KEY"] = ""  # Clears the environment variable storing the key
+    return "Your API Key has been reset"  # Returns a confirmation message
+# Function to get the current OpenAI API key
 def get_api_key():
+    api_key = os.getenv("OPENAI_API_KEY")  # Fetches the value of the environment variable
     return api_key
+# Function to set the model (GPT-3.5-turbo or GPT-4)
 def set_model(model):
+    os.environ["OPENAI_MODEL"] = model  # Sets an environment variable with the model
+    return f"{model} selected"  # Returns a confirmation message
+# Function to get the current model
 def get_model():
+    model = os.getenv("OPENAI_MODEL")  # Fetches the value of the environment variable
+    return model
+# Function to get file paths of uploaded files
 def upload_file(files):
+    file_paths = [file.name for file in files]  # List comprehension to get all file paths
     return file_paths
+# Function to create a Vectorstore
 def create_vectorstore(files):
+    # Vectorstore is a searchable store of vector representations for text passages.
+    pdf_dir = files.name  # Get the file name
+    pdf_loader = PyPDFDirectoryLoader(pdf_dir)  # Load the PDFs in the directory
+    pdf_docs = pdf_loader.load_and_split()  # Load and split the PDFs into sections
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)  # Set up a text splitter
+    texts = text_splitter.split_documents(pdf_docs)  # Split the documents into chunks
+    embeddings = OpenAIEmbeddings()  # Set up the OpenAI embeddings
+    # Create the Vectorstore from the documents, using the specified path, embeddings, and overwrite if it exists.
     db = DeepLake.from_documents(texts, dataset_path="./documentation_db", embedding=embeddings, overwrite=True)
+    return "Vectorstore Successfully Created"  # Returns a confirmation message
+# Function to generate a response given a user's message and previous chat history
 def respond(message, chat_history):
     # Get embeddings
     embeddings = OpenAIEmbeddings()
+    # Connect to existing Vectorstore
     db = DeepLake(dataset_path="./documentation_db", embedding_function=embeddings, read_only=True)
+    # Set retriever settings
     retriever = db.as_retriever(search_kwargs={"distance_metric":'cos',
                                                "fetch_k":10,
                                                "maximal_marginal_relevance":True,
                                                "k":10})
+    # Check if chat history is not empty
     if len(chat_history) != 0:
         chat_history = [(chat_history[0][0], chat_history[0][1])]
+    # Get model
     model = get_model()
     # Create ChatOpenAI and ConversationalRetrievalChain
     model = ChatOpenAI(model=model)
     qa = ConversationalRetrievalChain.from_llm(model, retriever)
+    # Generate a bot message
     bot_message = qa({"question": message, "chat_history": chat_history})
+    # Update chat history
     chat_history = [(message, bot_message["answer"])]
+    time.sleep(1)  # Wait for a second to simulate real-time interaction
+    return "", chat_history  # Return updated chat history
+# Start building the Gradio UI
 with gr.Blocks() as demo:
+    # Write some HTML for a header
     gr.Markdown("<h1 style='text-align: center;'>Langchain Coding Assistant</h1>")
+    # Write some explanatory text
     gr.Markdown("## This Gradio app is powered by ChatGPT and LangChain. You can submit your OpenAI API key and use the chatbot to get assistance with grant writing. \n ### 1. Enter your OpenAI API key. \n ### 2. Click 'Submit' to set your API key.\n ### 3. Upload the documents that you would like the model to be aware of and then create the vectorstore. Please note that once the vectorstore is created, it will persist and the documents will remain in the vectorstore. \n 4. Provide your prompt.")
+    # Set up a row of UI elements
     with gr.Row():
+        # Create a textbox for API key input
         api_input = gr.Textbox(label = "API Key",
                                placeholder = "Please provide your OpenAI API key here.")
+        # Create a non-interactive textbox to display API key status
         api_key_status = gr.Textbox(label = "API Key Status",
                                              placeholder = "Your API Key has not be set yet. Please enter your key.",
                                              interactive = False)
+    # Create a button to submit API key
     api_submit_button = gr.Button("Submit")
+    # Set the button to call set_api_key when clicked, updating the API key status
     api_submit_button.click(set_api_key, inputs=api_input, outputs=api_key_status)
+    # Create a button to reset API key
     api_reset_button = gr.Button("Clear API Key from session")
+    # Set the button to call reset_api_key when clicked, updating the API key status
     api_reset_button.click(reset_api_key, outputs=api_key_status)
+    # Set up a row of UI elements
     with gr.Row():
         with gr.Column():
+            # Create a dropdown to select a model
             model_selection = gr.Dropdown(
             ["gpt-3.5-turbo", "gpt-4"], label="Model Selection", info="Please ensure you provide the API Key that corresponds to the Model you select!"
         )
+            # Create a button to submit model selection
             model_submit_button = gr.Button("Submit Model Selection")
+        # Create a non-interactive textbox to display model selection status
         model_status = gr.Textbox(label = "Selected Model", interactive = False, lines=4)
+        # Set the button to call set_model when clicked, updating the model status
         model_submit_button.click(set_model, inputs=model_selection, outputs=model_status)
+    # Create a File output UI element for displaying uploaded files
     file_output = gr.File(label = "Uploaded files - Please note these files are persistent and will not be automatically deleted")
+    # Create an Upload button for PDF file(s)
     upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["pdf"], file_count="multiple")
+    # Set the button to call upload_file when clicked, updating the file_output with the uploaded files
     upload_button.upload(upload_file, upload_button, file_output)
+    # Create a button to create the vectorstore
     create_vectorstore_button = gr.Button("Click to create the vectorstore for your uploaded documents")
+    # Create a textbox to display Vectorstore status
     db_output = gr.Textbox(label = "Vectorstore Status")
+    # Set the button to call create_vectorstore when clicked, updating the Vectorstore status
     create_vectorstore_button.click(create_vectorstore, inputs=file_output, outputs = db_output)
+    # Create a Chatbot UI element
     chatbot = gr.Chatbot(label="ChatGPT Powered Grant Writing Assistant")
+    # Create a textbox for user's prompt
     msg = gr.Textbox(label="User Prompt", placeholder="Your Query Here")
+    # Create a button to clear the chat history
     clear = gr.Button("Clear")
+    # Set the textbox to call respond when submitted, updating the chatbot with the response
     msg.submit(respond, inputs = [msg, chatbot], outputs = [msg, chatbot])
+    # Set the button to clear the chat history when clicked
     clear.click(lambda: None, None, chatbot, queue=False)
+# Launch the Gradio interface
 demo.launch()