artificialguybr commited on
Commit
5038429
·
1 Parent(s): f336ef5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ from PIL import Image
5
+ import requests
6
+ from io import BytesIO
7
+
8
+ # Load the Qwen-VL model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
10
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()
11
+
12
+ def generate_predictions(image_input, text_input):
13
+ # Save the image locally to match the original example
14
+ user_image_path = "/tmp/user_input_test_image.jpg"
15
+ image_input.save(user_image_path)
16
+ image_input = Image.open(user_image_path)
17
+
18
+ # Prepare the inputs
19
+ query = tokenizer.from_list_format([
20
+ {'image': user_image_path},
21
+ {'text': text_input},
22
+ ])
23
+ inputs = tokenizer(query, return_tensors='pt')
24
+ inputs = inputs.to(model.device)
25
+
26
+ # Generate the caption
27
+ pred = model.generate(**inputs)
28
+ response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
29
+
30
+ # Draw bounding boxes if any
31
+ image_with_boxes = tokenizer.draw_bbox_on_latest_picture(response)
32
+
33
+ return image_with_boxes, response
34
+
35
+ # Create Gradio Interface
36
+ iface = gr.Interface(
37
+ fn=generate_predictions,
38
+ inputs=["image", "text"],
39
+ outputs=["image", "text"]
40
+ )
41
+
42
+ iface.launch()