Spaces:

vanderbilt-dsi
/

grant-writing-assistant

Sleeping

File size: 8,364 Bytes

bfabbde
858002b
 
 
0fd6500
bfabbde
 
858002b
 
 
 
 
 
 
 
 
 
bfabbde
858002b
bfabbde
 
858002b
bfabbde
858002b
bfabbde
 
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
bfabbde
 
858002b
bfabbde
858002b
bfabbde
 
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
bfabbde
 
 
 
 
 
 
 
858002b
bfabbde
858002b
bfabbde
858002b
 
 
 
 
bfabbde
858002b
bfabbde
858002b
 
 
 
 
bfabbde
858002b
 
 
bfabbde
858002b
 
4cd0c7a
858002b
 
bfabbde
858002b
bfabbde
858002b
bfabbde
 
858002b
bfabbde
858002b
8c214fb
bfabbde
216a6ee
8c214fb
bfabbde
b7e909d
8c214fb
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
 
 
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
 
bfabbde
858002b
03254f5
858002b
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
bfabbde
858002b
bfabbde
858002b
 
bfabbde
858002b
bfabbde
858002b
 
bfabbde
5c8569f

# Import necessary libraries
import openai
import random
import time
import gradio as gr  # Gradio is a library for creating UIs
import os  # This module provides functions to interact with the operating system
# Importing various classes and functions from the langchain package
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI

# Function to set the OpenAI API key
def set_api_key(key):
    os.environ["OPENAI_API_KEY"] = key  # Sets an environment variable with the key
    return f"Your API Key has been set to: {key}"  # Returns a confirmation message

# Function to reset the OpenAI API key
def reset_api_key():
    os.environ["OPENAI_API_KEY"] = ""  # Clears the environment variable storing the key
    return "Your API Key has been reset"  # Returns a confirmation message

# Function to get the current OpenAI API key
def get_api_key():
    api_key = os.getenv("OPENAI_API_KEY")  # Fetches the value of the environment variable
    return api_key

# Function to set the model (GPT-3.5-turbo or GPT-4)
def set_model(model):
    os.environ["OPENAI_MODEL"] = model  # Sets an environment variable with the model
    return f"{model} selected"  # Returns a confirmation message

# Function to get the current model
def get_model():
    model = os.getenv("OPENAI_MODEL")  # Fetches the value of the environment variable
    return model

# Function to get file paths of uploaded files
def upload_file(files):
    file_paths = [file.name for file in files]  # List comprehension to get all file paths
    return file_paths

# Function to create a Vectorstore
def create_vectorstore(files):
    # Vectorstore is a searchable store of vector representations for text passages.
    pdf_dir = files.name  # Get the file name
    pdf_loader = PyPDFDirectoryLoader(pdf_dir)  # Load the PDFs in the directory
    pdf_docs = pdf_loader.load_and_split()  # Load and split the PDFs into sections
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)  # Set up a text splitter
    texts = text_splitter.split_documents(pdf_docs)  # Split the documents into chunks
    embeddings = OpenAIEmbeddings()  # Set up the OpenAI embeddings
    # Create the Vectorstore from the documents, using the specified path, embeddings, and overwrite if it exists.
    db = DeepLake.from_documents(texts, dataset_path="./documentation_db", embedding=embeddings, overwrite=True)
    return "Vectorstore Successfully Created"  # Returns a confirmation message

# Function to generate a response given a user's message and previous chat history
def respond(message, chat_history):

    # Get embeddings
    embeddings = OpenAIEmbeddings()
    
    # Connect to existing Vectorstore
    db = DeepLake(dataset_path="./documentation_db", embedding_function=embeddings, read_only=True)
    # Set retriever settings
    retriever = db.as_retriever(search_kwargs={"distance_metric":'cos',
                                               "fetch_k":10,
                                               "maximal_marginal_relevance":True,
                                               "k":10})

    # Check if chat history is not empty
    if len(chat_history) != 0: 
        chat_history = [(chat_history[0][0], chat_history[0][1])]

    # Get model
    model = get_model()
    # Create ChatOpenAI and ConversationalRetrievalChain
    model = ChatOpenAI(model_name=model)
    qa = ConversationalRetrievalChain.from_llm(model, retriever)

    # Generate a bot message
    bot_message = qa({"question": message, "chat_history": chat_history})
    # Update chat history
    chat_history = [(message, bot_message["answer"])]
    time.sleep(1)  # Wait for a second to simulate real-time interaction
    return "", chat_history  # Return updated chat history

# Start building the Gradio UI
with gr.Blocks() as demo:

    # Write some HTML for a header
    gr.Markdown("<h1 style='text-align: center;'>Grant Writing Assistant</h1>")
    
    # Write some explanatory text
    gr.Markdown("## This Gradio app is powered by ChatGPT and LangChain. You can submit your OpenAI API key and use the chatbot to get assistance with grant writing. \n ### 1. Enter your OpenAI API key. \n ### 2. Click 'Submit' to set your API key.\n ### 3. Upload the documents that you would like the model to be aware of and then create the vectorstore. Please note that once the vectorstore is created, it will persist and the documents will remain in the vectorstore. \n ### 4. Provide your prompt.")
    
    # Set up a row of UI elements
    with gr.Row():
        # Create a textbox for API key input
        api_input = gr.Textbox(label = "API Key",
                               placeholder = "Please provide your OpenAI API key here.")
        # Create a non-interactive textbox to display API key status
        api_key_status = gr.Textbox(label = "API Key Status",
                                             placeholder = "Your API Key has not be set yet. Please enter your key.",
                                             interactive = False)
    # Create a button to submit API key
    api_submit_button = gr.Button("Submit")
    # Set the button to call set_api_key when clicked, updating the API key status
    api_submit_button.click(set_api_key, inputs=api_input, outputs=api_key_status)
    # Create a button to reset API key
    api_reset_button = gr.Button("Clear API Key from session")
    # Set the button to call reset_api_key when clicked, updating the API key status
    api_reset_button.click(reset_api_key, outputs=api_key_status)
    
    # Set up a row of UI elements
    with gr.Row():
        with gr.Column():
            # Create a dropdown to select a model
            model_selection = gr.Dropdown(
            ["gpt-3.5-turbo", "gpt-4"], label="Model Selection", info="Please ensure you provide the API Key that corresponds to the Model you select!"
        )
            # Create a button to submit model selection
            model_submit_button = gr.Button("Submit Model Selection")
        # Create a non-interactive textbox to display model selection status
        model_status = gr.Textbox(label = "Selected Model", interactive = False, lines=4)
        # Set the button to call set_model when clicked, updating the model status
        model_submit_button.click(set_model, inputs=model_selection, outputs=model_status)
        
    # Create a File output UI element for displaying uploaded files
    file_output = gr.File(label = "Uploaded files - Please note these files are persistent and will not be automatically deleted")
    # Create an Upload button for PDF file(s)
    upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["pdf"], file_count="multiple")
    # Set the button to call upload_file when clicked, updating the file_output with the uploaded files
    upload_button.upload(upload_file, upload_button, file_output)
    # Create a button to create the vectorstore
    create_vectorstore_button = gr.Button("Click to create the vectorstore for your uploaded documents")
    # Create a textbox to display Vectorstore status
    db_output = gr.Textbox(label = "Vectorstore Status")
    # Set the button to call create_vectorstore when clicked, updating the Vectorstore status
    create_vectorstore_button.click(create_vectorstore, inputs=file_output, outputs = db_output)
    
    # Create a Chatbot UI element
    chatbot = gr.Chatbot(label="ChatGPT Powered Grant Writing Assistant")
    # Create a textbox for user's prompt
    msg = gr.Textbox(label="User Prompt", placeholder="Your Query Here")
    # Create a button to clear the chat history
    clear = gr.Button("Clear")
    
    # Set the textbox to call respond when submitted, updating the chatbot with the response
    msg.submit(respond, inputs = [msg, chatbot], outputs = [msg, chatbot])
    # Set the button to clear the chat history when clicked
    clear.click(lambda: None, None, chatbot, queue=False)
    
# Launch the Gradio interface
demo.launch()