Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -204,7 +204,7 @@ def answer_question(images, question):
|
|
204 |
global gen_model, gen_tokenizer
|
205 |
# here each element of images is a tuple of (image_path, None).
|
206 |
images_ = [Image.open(image[0]).convert('RGB') for image in images]
|
207 |
-
msgs = [{'role': 'user', 'content': [*images_
|
208 |
answer = gen_model.chat(
|
209 |
image=None,
|
210 |
msgs=msgs,
|
@@ -212,26 +212,23 @@ def answer_question(images, question):
|
|
212 |
)
|
213 |
print(answer)
|
214 |
return answer
|
215 |
-
|
216 |
|
217 |
|
218 |
with gr.Blocks() as app:
|
219 |
gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
|
220 |
|
221 |
gr.Markdown("""
|
222 |
-
- A Vision Language Model Dense Retriever ([
|
223 |
|
224 |
- **Ask a question**, it retrieve most relavant pages, then [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on pages recalled, with strong multi-image understanding capability.
|
225 |
|
226 |
-
|
227 |
|
228 |
-
|
229 |
|
230 |
-
|
231 |
""")
|
232 |
|
233 |
-
gr.Markdown("- We **open-sourced** our visual embedding model at [RhapsodyAI/minicpm-visual-embedding-v0](https://huggingface.co/RhapsodyAI/minicpm-visual-embedding-v0)")
|
234 |
-
|
235 |
gr.Markdown("- Currently online demo support PDF document with less than 50 pages.")
|
236 |
|
237 |
with gr.Row():
|
@@ -251,8 +248,6 @@ with gr.Blocks() as app:
|
|
251 |
images_output = gr.Gallery(label="Retrieved Pages")
|
252 |
|
253 |
retrieve_button.click(retrieve_gradio, inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
|
254 |
-
|
255 |
-
gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
|
256 |
|
257 |
with gr.Row():
|
258 |
button = gr.Button("Answer Question with Retrieved Pages")
|
@@ -267,7 +262,8 @@ with gr.Blocks() as app:
|
|
267 |
|
268 |
upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
|
269 |
downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
|
270 |
-
|
|
|
271 |
|
272 |
|
273 |
app.launch()
|
|
|
204 |
global gen_model, gen_tokenizer
|
205 |
# here each element of images is a tuple of (image_path, None).
|
206 |
images_ = [Image.open(image[0]).convert('RGB') for image in images]
|
207 |
+
msgs = [{'role': 'user', 'content': [question, *images_]}]
|
208 |
answer = gen_model.chat(
|
209 |
image=None,
|
210 |
msgs=msgs,
|
|
|
212 |
)
|
213 |
print(answer)
|
214 |
return answer
|
|
|
215 |
|
216 |
|
217 |
with gr.Blocks() as app:
|
218 |
gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
|
219 |
|
220 |
gr.Markdown("""
|
221 |
+
- A Vision Language Model Dense Retriever ([minicpm-visual-embedding-v0](https://huggingface.co/RhapsodyAI/minicpm-visual-embedding-v0)) **directly reads** your PDFs **without need of OCR**, produce **multimodal dense representations** and build your personal library.
|
222 |
|
223 |
- **Ask a question**, it retrieve most relavant pages, then [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on pages recalled, with strong multi-image understanding capability.
|
224 |
|
225 |
+
- It helps you read a long **visually-intensive** or **text-oriented** PDF document and find the pages that answer your question.
|
226 |
|
227 |
+
- It helps you build a personal library and retireve book pages from a large collection of books.
|
228 |
|
229 |
+
- It works like a human: read, store, retrieve, and answer with full vision.
|
230 |
""")
|
231 |
|
|
|
|
|
232 |
gr.Markdown("- Currently online demo support PDF document with less than 50 pages.")
|
233 |
|
234 |
with gr.Row():
|
|
|
248 |
images_output = gr.Gallery(label="Retrieved Pages")
|
249 |
|
250 |
retrieve_button.click(retrieve_gradio, inputs=[kb_id_input, query_input, topk_input], outputs=images_output)
|
|
|
|
|
251 |
|
252 |
with gr.Row():
|
253 |
button = gr.Button("Answer Question with Retrieved Pages")
|
|
|
262 |
|
263 |
upvote_button.click(upvote, inputs=[kb_id_input, query_input], outputs=None)
|
264 |
downvote_button.click(downvote, inputs=[kb_id_input, query_input], outputs=None)
|
265 |
+
|
266 |
+
gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
|
267 |
|
268 |
|
269 |
app.launch()
|