fffiloni commited on
Commit
e348efe
1 Parent(s): ceaca7e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from langchain.document_loaders import OnlinePDFLoader
4
+
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
7
+
8
+ from langchain.llms import HuggingFaceHub
9
+ flan_ul2 = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature":0.1, "max_new_tokens":300})
10
+
11
+ from langchain.embeddings import HuggingFaceHubEmbeddings
12
+ embeddings = HuggingFaceHubEmbeddings()
13
+
14
+ from langchain.vectorstores import Chroma
15
+
16
+ from langchain.chains import RetrievalQA
17
+
18
+ def infer(pdf_doc):
19
+ loader = OnlinePDFLoader(pdf_doc)
20
+ documents = loader.load()
21
+ texts = text_splitter.split_documents(documents)
22
+ db = Chroma.from_documents(texts, embeddings)
23
+ retriever = db.as_retriever()
24
+ qa = RetrievalQA.from_chain_type(llm=flan_ul2, chain_type="stuff", retriever=retriever, return_source_documents=True)
25
+ query = "What is the title of this paper?"
26
+ result = qa({"query": query})
27
+
28
+ return result
29
+
30
+ gr.Interface(fn=infer, inputs=[gr.Textbox(value="https://arxiv.org/pdf/2304.03757.pdf")], outputs=[gr.Textbox()]).launch()