Spaces:

vanderbilt-dsi
/

grant-writing-assistant

Runtime error

App Files Files Community

grant-writing-assistant / app.py

umangchaudhry

Update app.py

4cd0c7a about 2 years ago

raw

history blame

8.37 kB

	# Import necessary libraries
	import openai
	import random
	import time
	import gradio as gr # Gradio is a library for creating UIs
	import os # This module provides functions to interact with the operating system
	# Importing various classes and functions from the langchain package
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import DeepLake
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import ConversationalRetrievalChain
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.memory import ConversationBufferMemory
	from langchain.llms import OpenAI

	# Function to set the OpenAI API key
	def set_api_key(key):
	os.environ["OPENAI_API_KEY"] = key # Sets an environment variable with the key
	return f"Your API Key has been set to: {key}" # Returns a confirmation message

	# Function to reset the OpenAI API key
	def reset_api_key():
	os.environ["OPENAI_API_KEY"] = "" # Clears the environment variable storing the key
	return "Your API Key has been reset" # Returns a confirmation message

	# Function to get the current OpenAI API key
	def get_api_key():
	api_key = os.getenv("OPENAI_API_KEY") # Fetches the value of the environment variable
	return api_key

	# Function to set the model (GPT-3.5-turbo or GPT-4)
	def set_model(model):
	os.environ["OPENAI_MODEL"] = model # Sets an environment variable with the model
	return f"{model} selected" # Returns a confirmation message

	# Function to get the current model
	def get_model():
	model = os.getenv("OPENAI_MODEL") # Fetches the value of the environment variable
	return model

	# Function to get file paths of uploaded files
	def upload_file(files):
	file_paths = [file.name for file in files] # List comprehension to get all file paths
	return file_paths

	# Function to create a Vectorstore
	def create_vectorstore(files):
	# Vectorstore is a searchable store of vector representations for text passages.
	pdf_dir = files.name # Get the file name
	pdf_loader = PyPDFDirectoryLoader(pdf_dir) # Load the PDFs in the directory
	pdf_docs = pdf_loader.load_and_split() # Load and split the PDFs into sections
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Set up a text splitter
	texts = text_splitter.split_documents(pdf_docs) # Split the documents into chunks
	embeddings = OpenAIEmbeddings() # Set up the OpenAI embeddings
	# Create the Vectorstore from the documents, using the specified path, embeddings, and overwrite if it exists.
	db = DeepLake.from_documents(texts, dataset_path="./documentation_db", embedding=embeddings, overwrite=True)
	return "Vectorstore Successfully Created" # Returns a confirmation message

	# Function to generate a response given a user's message and previous chat history
	def respond(message, chat_history):

	# Get embeddings
	embeddings = OpenAIEmbeddings()

	# Connect to existing Vectorstore
	db = DeepLake(dataset_path="./documentation_db", embedding_function=embeddings, read_only=True)
	# Set retriever settings
	retriever = db.as_retriever(search_kwargs={"distance_metric":'cos',
	"fetch_k":10,
	"maximal_marginal_relevance":True,
	"k":10})

	# Check if chat history is not empty
	if len(chat_history) != 0:
	chat_history = [(chat_history[0][0], chat_history[0][1])]

	# Get model
	model = get_model()
	# Create ChatOpenAI and ConversationalRetrievalChain
	model = ChatOpenAI(model_name=model)
	qa = ConversationalRetrievalChain.from_llm(model, retriever)

	# Generate a bot message
	bot_message = qa({"question": message, "chat_history": chat_history})
	# Update chat history
	chat_history = [(message, bot_message["answer"])]
	time.sleep(1) # Wait for a second to simulate real-time interaction
	return "", chat_history # Return updated chat history

	# Start building the Gradio UI
	with gr.Blocks() as demo:

	# Write some HTML for a header
	gr.Markdown("<h1 style='text-align: center;'>Langchain Coding Assistant</h1>")

	# Write some explanatory text
	gr.Markdown("## This Gradio app is powered by ChatGPT and LangChain. You can submit your OpenAI API key and use the chatbot to get assistance with grant writing. \n ### 1. Enter your OpenAI API key. \n ### 2. Click 'Submit' to set your API key.\n ### 3. Upload the documents that you would like the model to be aware of and then create the vectorstore. Please note that once the vectorstore is created, it will persist and the documents will remain in the vectorstore. \n ### 4. Provide your prompt.")

	# Set up a row of UI elements
	with gr.Row():
	# Create a textbox for API key input
	api_input = gr.Textbox(label = "API Key",
	placeholder = "Please provide your OpenAI API key here.")
	# Create a non-interactive textbox to display API key status
	api_key_status = gr.Textbox(label = "API Key Status",
	placeholder = "Your API Key has not be set yet. Please enter your key.",
	interactive = False)
	# Create a button to submit API key
	api_submit_button = gr.Button("Submit")
	# Set the button to call set_api_key when clicked, updating the API key status
	api_submit_button.click(set_api_key, inputs=api_input, outputs=api_key_status)
	# Create a button to reset API key
	api_reset_button = gr.Button("Clear API Key from session")
	# Set the button to call reset_api_key when clicked, updating the API key status
	api_reset_button.click(reset_api_key, outputs=api_key_status)

	# Set up a row of UI elements
	with gr.Row():
	with gr.Column():
	# Create a dropdown to select a model
	model_selection = gr.Dropdown(
	["gpt-3.5-turbo", "gpt-4"], label="Model Selection", info="Please ensure you provide the API Key that corresponds to the Model you select!"
	)
	# Create a button to submit model selection
	model_submit_button = gr.Button("Submit Model Selection")
	# Create a non-interactive textbox to display model selection status
	model_status = gr.Textbox(label = "Selected Model", interactive = False, lines=4)
	# Set the button to call set_model when clicked, updating the model status
	model_submit_button.click(set_model, inputs=model_selection, outputs=model_status)

	# Create a File output UI element for displaying uploaded files
	file_output = gr.File(label = "Uploaded files - Please note these files are persistent and will not be automatically deleted")
	# Create an Upload button for PDF file(s)
	upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["pdf"], file_count="multiple")
	# Set the button to call upload_file when clicked, updating the file_output with the uploaded files
	upload_button.upload(upload_file, upload_button, file_output)
	# Create a button to create the vectorstore
	create_vectorstore_button = gr.Button("Click to create the vectorstore for your uploaded documents")
	# Create a textbox to display Vectorstore status
	db_output = gr.Textbox(label = "Vectorstore Status")
	# Set the button to call create_vectorstore when clicked, updating the Vectorstore status
	create_vectorstore_button.click(create_vectorstore, inputs=file_output, outputs = db_output)

	# Create a Chatbot UI element
	chatbot = gr.Chatbot(label="ChatGPT Powered Grant Writing Assistant")
	# Create a textbox for user's prompt
	msg = gr.Textbox(label="User Prompt", placeholder="Your Query Here")
	# Create a button to clear the chat history
	clear = gr.Button("Clear")

	# Set the textbox to call respond when submitted, updating the chatbot with the response
	msg.submit(respond, inputs = [msg, chatbot], outputs = [msg, chatbot])
	# Set the button to clear the chat history when clicked
	clear.click(lambda: None, None, chatbot, queue=False)

	# Launch the Gradio interface
	demo.launch()