Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -247,28 +247,28 @@ with gr.Blocks() as app:
|
|
247 |
gr.Markdown("# VisRAG Pipeline: Vision-based Retrieval-augmented Generation on Multi-modality Documents")
|
248 |
|
249 |
gr.Markdown("""
|
250 |
-
- A Vision Language Model Dense Retriever ([VisRAG-Ret](https://huggingface.co/openbmb/VisRAG-Ret)) **directly reads** your PDFs **without need
|
251 |
|
252 |
-
- **Ask a question**, it retrieve most
|
253 |
|
254 |
-
- It
|
255 |
|
256 |
-
- It
|
257 |
|
258 |
-
- It works like a human:
|
259 |
""")
|
260 |
|
261 |
-
gr.Markdown("-
|
262 |
|
263 |
with gr.Row():
|
264 |
file_input = gr.File(file_types=["pdf"], label="Step 1: Upload PDF")
|
265 |
file_result = gr.Text(label="Knowledge Base ID (remember it, it is re-usable!)")
|
266 |
-
process_button = gr.Button("Process PDF (Don't click until PDF
|
267 |
|
268 |
process_button.click(add_pdf_gradio, inputs=[file_input], outputs=file_result)
|
269 |
|
270 |
with gr.Row():
|
271 |
-
kb_id_input = gr.Text(label="Your Knowledge Base ID (paste your Knowledge Base ID here, it is re-usable:
|
272 |
query_input = gr.Text(label="Your Queston")
|
273 |
topk_input = inputs=gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
|
274 |
retrieve_button = gr.Button("Step2: Retrieve Pages")
|
@@ -276,10 +276,10 @@ with gr.Blocks() as app:
|
|
276 |
with gr.Row():
|
277 |
gr.Examples(
|
278 |
examples=[
|
279 |
-
["main_figure.pdf", "What is RAG-V?"],
|
280 |
["main_figure.pdf", "How does RAG-V perform?"]
|
281 |
],
|
282 |
-
inputs=[file_input, query_input],
|
283 |
)
|
284 |
|
285 |
with gr.Row():
|
@@ -301,7 +301,7 @@ with gr.Blocks() as app:
|
|
301 |
upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
|
302 |
downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
|
303 |
|
304 |
-
gr.Markdown("By using this demo, you agree to share your
|
305 |
|
306 |
|
307 |
app.launch()
|
|
|
247 |
gr.Markdown("# VisRAG Pipeline: Vision-based Retrieval-augmented Generation on Multi-modality Documents")
|
248 |
|
249 |
gr.Markdown("""
|
250 |
+
- A Vision Language Model Dense Retriever ([VisRAG-Ret](https://huggingface.co/openbmb/VisRAG-Ret)) **directly reads** your PDFs **without need for OCR**, generates **multimodal dense representations** and assists in building your personal library.
|
251 |
|
252 |
+
- **Ask a question**, and it will retrieve the most relevant pages. Then, [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on the recalled pages, utilizing its strong multi-image understanding capabilities.
|
253 |
|
254 |
+
- It assists you in reading **lengthy**, **visually-intensive** or **text-oriented** PDF documents, helping you locate pages that answer your questions.
|
255 |
|
256 |
+
- It enables you to build a personal library and retrieve book pages from a large collection of books.
|
257 |
|
258 |
+
- It works like a human: reading, storing, retrieving, and answering with full visual comprehension.
|
259 |
""")
|
260 |
|
261 |
+
gr.Markdown("- The current online demo supports PDF documents with fewer than 50 pages due to GPU time limitations. For longer PDFs and books, consider deploying it on your own machine.")
|
262 |
|
263 |
with gr.Row():
|
264 |
file_input = gr.File(file_types=["pdf"], label="Step 1: Upload PDF")
|
265 |
file_result = gr.Text(label="Knowledge Base ID (remember it, it is re-usable!)")
|
266 |
+
process_button = gr.Button("Process PDF (Don't click until PDF uploaded successfully)")
|
267 |
|
268 |
process_button.click(add_pdf_gradio, inputs=[file_input], outputs=file_result)
|
269 |
|
270 |
with gr.Row():
|
271 |
+
kb_id_input = gr.Text(label="Your Knowledge Base ID (paste your Knowledge Base ID here, it is re-usable):")
|
272 |
query_input = gr.Text(label="Your Queston")
|
273 |
topk_input = inputs=gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
|
274 |
retrieve_button = gr.Button("Step2: Retrieve Pages")
|
|
|
276 |
with gr.Row():
|
277 |
gr.Examples(
|
278 |
examples=[
|
279 |
+
["main_figure.pdf", """What is RAG-V?"],
|
280 |
["main_figure.pdf", "How does RAG-V perform?"]
|
281 |
],
|
282 |
+
inputs=[file_input, file_result, query_input],
|
283 |
)
|
284 |
|
285 |
with gr.Row():
|
|
|
301 |
upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
|
302 |
downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
|
303 |
|
304 |
+
gr.Markdown("By using this demo, you agree to share your usage data with us for research purposes, helping us improve the user experience.")
|
305 |
|
306 |
|
307 |
app.launch()
|