sunsmarterjieleaf commited on
Commit
5f3ef47
·
verified ·
1 Parent(s): 0fb4f04

Create app.py

Browse files

A demo for yolov12.

Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import cv2
4
+ import tempfile
5
+ from ultralytics import YOLO
6
+
7
+
8
+ def yolov12_inference(image, video, model_id, image_size, conf_threshold):
9
+ model = YOLO(model_id)
10
+ if image:
11
+ results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
12
+ annotated_image = results[0].plot()
13
+ return annotated_image[:, :, ::-1], None
14
+ else:
15
+ video_path = tempfile.mktemp(suffix=".webm")
16
+ with open(video_path, "wb") as f:
17
+ with open(video, "rb") as g:
18
+ f.write(g.read())
19
+
20
+ cap = cv2.VideoCapture(video_path)
21
+ fps = cap.get(cv2.CAP_PROP_FPS)
22
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
23
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
24
+
25
+ output_video_path = tempfile.mktemp(suffix=".webm")
26
+ out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height))
27
+
28
+ while cap.isOpened():
29
+ ret, frame = cap.read()
30
+ if not ret:
31
+ break
32
+
33
+ results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
34
+ annotated_frame = results[0].plot()
35
+ out.write(annotated_frame)
36
+
37
+ cap.release()
38
+ out.release()
39
+
40
+ return None, output_video_path
41
+
42
+
43
+ def yolov12_inference_for_examples(image, model_path, image_size, conf_threshold):
44
+ annotated_image, _ = yolov12_inference(image, None, model_path, image_size, conf_threshold)
45
+ return annotated_image
46
+
47
+
48
+ def app():
49
+ with gr.Blocks():
50
+ with gr.Row():
51
+ with gr.Column():
52
+ image = gr.Image(type="pil", label="Image", visible=True)
53
+ video = gr.Video(label="Video", visible=False)
54
+ input_type = gr.Radio(
55
+ choices=["Image", "Video"],
56
+ value="Image",
57
+ label="Input Type",
58
+ )
59
+ model_id = gr.Dropdown(
60
+ label="Model",
61
+ choices=[
62
+ "yolov12n.pt",
63
+ "yolov12s.pt",
64
+ "yolov12m.pt",
65
+ "yolov12l.pt",
66
+ "yolov12x.pt",
67
+ ],
68
+ value="yolov12m.pt",
69
+ )
70
+ image_size = gr.Slider(
71
+ label="Image Size",
72
+ minimum=320,
73
+ maximum=1280,
74
+ step=32,
75
+ value=640,
76
+ )
77
+ conf_threshold = gr.Slider(
78
+ label="Confidence Threshold",
79
+ minimum=0.0,
80
+ maximum=1.0,
81
+ step=0.05,
82
+ value=0.25,
83
+ )
84
+ yolov12_infer = gr.Button(value="Detect Objects")
85
+
86
+ with gr.Column():
87
+ output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
88
+ output_video = gr.Video(label="Annotated Video", visible=False)
89
+
90
+ def update_visibility(input_type):
91
+ image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
92
+ video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
93
+ output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
94
+ output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
95
+
96
+ return image, video, output_image, output_video
97
+
98
+ input_type.change(
99
+ fn=update_visibility,
100
+ inputs=[input_type],
101
+ outputs=[image, video, output_image, output_video],
102
+ )
103
+
104
+ def run_inference(image, video, model_id, image_size, conf_threshold, input_type):
105
+ if input_type == "Image":
106
+ return yolov12_inference(image, None, model_id, image_size, conf_threshold)
107
+ else:
108
+ return yolov12_inference(None, video, model_id, image_size, conf_threshold)
109
+
110
+
111
+ yolov12_infer.click(
112
+ fn=run_inference,
113
+ inputs=[image, video, model_id, image_size, conf_threshold, input_type],
114
+ outputs=[output_image, output_video],
115
+ )
116
+
117
+ gr.Examples(
118
+ examples=[
119
+ [
120
+ "ultralytics/assets/bus.jpg",
121
+ "yolov12s.pt",
122
+ 640,
123
+ 0.25,
124
+ ],
125
+ [
126
+ "ultralytics/assets/zidane.jpg",
127
+ "yolov12x.pt",
128
+ 640,
129
+ 0.25,
130
+ ],
131
+ ],
132
+ fn=yolov12_inference_for_examples,
133
+ inputs=[
134
+ image,
135
+ model_id,
136
+ image_size,
137
+ conf_threshold,
138
+ ],
139
+ outputs=[output_image],
140
+ cache_examples='lazy',
141
+ )
142
+
143
+ gradio_app = gr.Blocks()
144
+ with gradio_app:
145
+ gr.HTML(
146
+ """
147
+ <h1 style='text-align: center'>
148
+ YOLOv12: Attention-Centric Real-Time Object Detectors
149
+ </h1>
150
+ """)
151
+ gr.HTML(
152
+ """
153
+ <h3 style='text-align: center'>
154
+ <a href='https://arxiv.org/abs/2503.xxxxx' target='_blank'>arXiv</a> | <a href='https://github.com/sunsmarterjie/yolov12' target='_blank'>github</a>
155
+ </h3>
156
+ """)
157
+ with gr.Row():
158
+ with gr.Column():
159
+ app()
160
+ if __name__ == '__main__':
161
+ gradio_app.launch()