|
|
|
import gradio as gr |
|
import torch |
|
from PIL import Image |
|
from transformers import AutoModel, AutoTokenizer |
|
import spaces |
|
|
|
device="cuda" |
|
|
|
|
|
model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16) |
|
model = model.to(device='cuda') |
|
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True) |
|
model.eval() |
|
|
|
|
|
@spaces.GPU |
|
def generate_response(image, question): |
|
msgs = [{'role': 'user', 'content': question}] |
|
res = model.chat( |
|
image=image, |
|
msgs=msgs, |
|
tokenizer=tokenizer, |
|
sampling=True, |
|
temperature=0.7, |
|
stream=True |
|
) |
|
generated_text = "" |
|
for new_text in res: |
|
generated_text += new_text |
|
return generated_text |
|
|
|
|
|
footer = """ |
|
<div style="text-align: center; margin-top: 20px;"> |
|
<a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> | |
|
<a href="https://github.com/arad1367/Visual_QA_MiniCPM-Llama3-V-2_5_GradioApp" target="_blank">GitHub</a> | |
|
<a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a> |
|
<br> |
|
Made with π by Pejman Ebrahimi |
|
</div> |
|
""" |
|
|
|
|
|
with gr.Blocks(theme='abidlabs/dracula_revamped') as demo: |
|
gr.Markdown("Visual Question Answering - Complete chart and image analysis") |
|
gr.Markdown("Input an image and a question related to the image to receive a response.") |
|
image_input = gr.Image(type="pil", label="Image") |
|
question_input = gr.Textbox(label="Question") |
|
output_text = gr.Textbox(label="Response") |
|
image_input.change(generate_response, inputs=[image_input, question_input], outputs=output_text) |
|
gr.HTML(footer) |
|
|
|
|
|
demo.launch(debug=True) |
|
|