|
|
|
import gradio as gr |
|
import base64 |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
client = InferenceClient('meta-llama/Llama-3.2-11B-Vision-Instruct') |
|
|
|
def imageDescription(image, prompt): |
|
image_path="image.png" |
|
image.save(image_path) |
|
with open(image_path, "rb") as f: |
|
base64_image = base64.b64encode(f.read()).decode("utf-8") |
|
image_url = f"data:image/png;base64,{base64_image}" |
|
output = client.chat.completions.create(messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": image_url}, |
|
}, |
|
{ |
|
"type": "text", |
|
"text": prompt, |
|
}, |
|
], |
|
}, |
|
], |
|
) |
|
return output.choices[0].message.content |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Citrus()) as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
image=gr.Image(type="pil", label="upload an immage") |
|
with gr.Column(): |
|
prompt = gr.Textbox(label="What would you like to know about this picture?",scale=1) |
|
describe_btn = gr.Button("Describe the image",scale=1) |
|
output = gr.Textbox(label="Description",scale=1) |
|
|
|
describe_btn.click(fn=imageDescription, inputs=[image, prompt], outputs=output) |
|
demo.launch(debug=True) |
|
|