import gradio as gr from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.llms import HuggingFaceHub from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.document_loaders import PyMuPDFLoader from dotenv import load_dotenv import os load_dotenv() # Set Hugging Face API token from environment variable os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN", "default_value_if_not_found") def load_doc(pdf_doc): loader = PyMuPDFLoader(pdf_doc.name) documents = loader.load() embedding = HuggingFaceEmbeddings() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) text = text_splitter.split_documents(documents) db = Chroma.from_documents(text, embedding) llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", model_kwargs={"temperature": 1.0, "max_length": 256}) global chain chain = RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever=db.as_retriever()) return 'Document has successfully been loaded' def answer_query(query): question = query return chain.run(question) html = """

ChatPDF

Upload a PDF File, then click on Load PDF File
Once the document has been loaded you can begin chatting with the PDF =)

""" css = """container{max-width:700px; margin-left:auto; margin-right:auto,padding:20px}""" with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo: gr.HTML(html) with gr.Column(): gr.Markdown('ChatPDF') pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf','.docx'],type='file') with gr.Row(): load_pdf = gr.Button('Load pdf file') status = gr.Textbox(label="Status",placeholder='',interactive=False) with gr.Row(): input = gr.Textbox(label="type in your question") output = gr.Textbox(label="output") submit_query = gr.Button("submit") load_pdf.click(load_doc,inputs=pdf_doc,outputs=status) submit_query.click(answer_query,input,output) demo.launch()