tcy6 commited on
Commit
bace9e3
1 Parent(s): 558ab5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -247,28 +247,28 @@ with gr.Blocks() as app:
247
  gr.Markdown("# VisRAG Pipeline: Vision-based Retrieval-augmented Generation on Multi-modality Documents")
248
 
249
  gr.Markdown("""
250
- - A Vision Language Model Dense Retriever ([VisRAG-Ret](https://huggingface.co/openbmb/VisRAG-Ret)) **directly reads** your PDFs **without need of OCR**, produce **multimodal dense representations** and build your personal library.
251
 
252
- - **Ask a question**, it retrieve most relavant pages, then [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on pages recalled, with strong multi-image understanding capability.
253
 
254
- - It helps you read a long **visually-intensive** or **text-oriented** PDF document and find the pages that answer your question.
255
 
256
- - It helps you build a personal library and retireve book pages from a large collection of books.
257
 
258
- - It works like a human: read, store, retrieve, and answer with full vision.
259
  """)
260
 
261
- gr.Markdown("- Currently online demo support PDF document with less than 50 pages due to GPU time limit. Deploy on your own machine for longer PDFs and books.")
262
 
263
  with gr.Row():
264
  file_input = gr.File(file_types=["pdf"], label="Step 1: Upload PDF")
265
  file_result = gr.Text(label="Knowledge Base ID (remember it, it is re-usable!)")
266
- process_button = gr.Button("Process PDF (Don't click until PDF upload success)")
267
 
268
  process_button.click(add_pdf_gradio, inputs=[file_input], outputs=file_result)
269
 
270
  with gr.Row():
271
- kb_id_input = gr.Text(label="Your Knowledge Base ID (paste your Knowledge Base ID here, it is re-usable:)")
272
  query_input = gr.Text(label="Your Queston")
273
  topk_input = inputs=gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
274
  retrieve_button = gr.Button("Step2: Retrieve Pages")
@@ -276,10 +276,10 @@ with gr.Blocks() as app:
276
  with gr.Row():
277
  gr.Examples(
278
  examples=[
279
- ["main_figure.pdf", "What is RAG-V?"],
280
  ["main_figure.pdf", "How does RAG-V perform?"]
281
  ],
282
- inputs=[file_input, query_input],
283
  )
284
 
285
  with gr.Row():
@@ -301,7 +301,7 @@ with gr.Blocks() as app:
301
  upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
302
  downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
303
 
304
- gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
305
 
306
 
307
  app.launch()
 
247
  gr.Markdown("# VisRAG Pipeline: Vision-based Retrieval-augmented Generation on Multi-modality Documents")
248
 
249
  gr.Markdown("""
250
+ - A Vision Language Model Dense Retriever ([VisRAG-Ret](https://huggingface.co/openbmb/VisRAG-Ret)) **directly reads** your PDFs **without need for OCR**, generates **multimodal dense representations** and assists in building your personal library.
251
 
252
+ - **Ask a question**, and it will retrieve the most relevant pages. Then, [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on the recalled pages, utilizing its strong multi-image understanding capabilities.
253
 
254
+ - It assists you in reading **lengthy**, **visually-intensive** or **text-oriented** PDF documents, helping you locate pages that answer your questions.
255
 
256
+ - It enables you to build a personal library and retrieve book pages from a large collection of books.
257
 
258
+ - It works like a human: reading, storing, retrieving, and answering with full visual comprehension.
259
  """)
260
 
261
+ gr.Markdown("- The current online demo supports PDF documents with fewer than 50 pages due to GPU time limitations. For longer PDFs and books, consider deploying it on your own machine.")
262
 
263
  with gr.Row():
264
  file_input = gr.File(file_types=["pdf"], label="Step 1: Upload PDF")
265
  file_result = gr.Text(label="Knowledge Base ID (remember it, it is re-usable!)")
266
+ process_button = gr.Button("Process PDF (Don't click until PDF uploaded successfully)")
267
 
268
  process_button.click(add_pdf_gradio, inputs=[file_input], outputs=file_result)
269
 
270
  with gr.Row():
271
+ kb_id_input = gr.Text(label="Your Knowledge Base ID (paste your Knowledge Base ID here, it is re-usable):")
272
  query_input = gr.Text(label="Your Queston")
273
  topk_input = inputs=gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
274
  retrieve_button = gr.Button("Step2: Retrieve Pages")
 
276
  with gr.Row():
277
  gr.Examples(
278
  examples=[
279
+ ["main_figure.pdf", """What is RAG-V?"],
280
  ["main_figure.pdf", "How does RAG-V perform?"]
281
  ],
282
+ inputs=[file_input, file_result, query_input],
283
  )
284
 
285
  with gr.Row():
 
301
  upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
302
  downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
303
 
304
+ gr.Markdown("By using this demo, you agree to share your usage data with us for research purposes, helping us improve the user experience.")
305
 
306
 
307
  app.launch()