import os from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.llms import OpenAI from langchain.chains import ConversationalRetrievalChain import pickle import gradio as gr import time def upload_file(file, key): # Set the Enviroment variable os.environ["OPENAI_API_KEY"] = key # load document loader = PyPDFLoader(file.name) documents = loader.load() # split the documents into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) texts = text_splitter.split_documents(documents) # OPENAI embegddings embeddings = OpenAIEmbeddings() # create the vectorestore to use as the index db = FAISS.from_documents(documents, embeddings) with open("vectorstore.pkl", "wb") as f: pickle.dump(db, f) return file.name with gr.Blocks() as demo: openai_key = gr.Textbox(label="OPENAI API KEY") file_output = gr.File(label="Please select a pdf file wait for the document to be displayed here") upload_button = gr.UploadButton("Click to upload a pdf document", file_types=["pdf"], file_count="single") upload_button.upload(upload_file, inputs = [upload_button, openai_key], outputs= file_output) chatbot = gr.Chatbot(label="Chat") msg = gr.Textbox(label="Enter your query") clear = gr.Button("Clear") def user(user_message, history): return "", history + [[user_message, None]] def bot(history): user_message = history[-1][0] with open("vectorstore.pkl", "rb") as f: vectorstore = pickle.load(f) llm = OpenAI(temperature=0) qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True) hist = [] if history[-1][1] != None: hist = history result = qa({"question": user_message, "chat_history": hist}) history[-1][1] = result['answer'] return history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()