zhwang4ai commited on
Commit
5d301c2
·
verified ·
1 Parent(s): 12502a1

Update score.json

Browse files
Files changed (1) hide show
  1. score.json +187 -1045
score.json CHANGED
@@ -1,1182 +1,324 @@
1
  {
2
- "llama3-llava-next-8b-hf-24": {
3
- "visual-basic": 0.4186046511627907,
4
- "knowledge": 0.1896551724137931,
5
- "reason": 0.23076923076923078,
6
- "visual-advance": 0.25
7
- },
8
- "gpt-4o-24": {
9
- "knowledge": 0.9655172413793104,
10
- "reason": 0.7692307692307693,
11
- "visual-advance": 0.85,
12
- "visual-basic": 0.7674418604651163
13
- },
14
- "gpt-4o-mini-24": {
15
- "knowledge": 0.7586206896551724,
16
- "reason": 0.5384615384615384,
17
- "visual-advance": 0.6,
18
- "visual-basic": 0.627906976744186
19
- },
20
- "fuyu-8b-24": {
21
- "reason": 0.0,
22
- "knowledge": 0.017241379310344827
23
- },
24
- "llava-1.5-13b-hf-24": {
25
- "reason": 0.0,
26
- "visual-basic": 0.3023255813953488,
27
- "knowledge": 0.0,
28
- "visual-advance": 0.65
29
- },
30
- "llava-1.5-7b-hf-24": {
31
- "visual-advance": 0.45,
32
- "visual-basic": 0.32558139534883723,
33
- "reason": 0.0,
34
- "knowledge": 0.0
35
- },
36
- "llava-v1.6-mistral-7b-hf-24": {
37
- "visual-basic": 0.37209302325581395,
38
- "visual-advance": 0.55,
39
- "reason": 0.15384615384615385,
40
- "knowledge": 0.15517241379310345
41
- },
42
- "llava-v1.6-vicuna-13b-hf-24": {
43
- "visual-advance": 0.35,
44
- "visual-basic": 0.4186046511627907,
45
- "knowledge": 0.1724137931034483,
46
- "reason": 0.15384615384615385
47
- },
48
- "llava-v1.6-vicuna-7b-hf-24": {
49
- "reason": 0.15384615384615385,
50
- "visual-basic": 0.23255813953488372,
51
- "visual-advance": 0.2,
52
- "knowledge": 0.06896551724137931
53
- },
54
- "MiniCPM-V-2_6-24": {
55
- "reason": 0.15384615384615385,
56
- "visual-basic": 0.5116279069767442,
57
- "visual-advance": 0.4,
58
- "knowledge": 0.15517241379310345
59
- },
60
- "10003-2024-09-02 09:46:53-24": {
61
- "reason": 0.3076923076923077,
62
- "knowledge": 0.29310344827586204
63
- },
64
- "llava-gemma-2b-24": {
65
- "reason": 0.0,
66
- "visual-basic": 0.3023255813953488,
67
- "visual-advance": 0.2,
68
- "knowledge": 0.034482758620689655
69
- },
70
- "molmo-7b-d-0924-24": {
71
- "visual-basic": 0.5813953488372093,
72
- "visual-advance": 0.15,
73
- "knowledge": 0.1206896551724138,
74
- "reason": 0.15384615384615385,
75
- "gui": {
76
  "100": 0.65,
77
  "20": 0.19,
78
  "50": 0.46,
79
  "200": 0.84
80
  },
81
- "embodied": {
82
  "100": 0.30916030534351147,
83
  "20": 0.03435114503816794,
84
  "50": 0.16412213740458015,
85
  "200": 0.5
86
  }
87
  },
88
- "molmo-72b-0924-24": {
89
- "reason": 0.46153846153846156,
90
- "visual-advance": 0.55,
91
- "knowledge": 0.3620689655172414,
92
- "visual-basic": 0.5581395348837209
93
  },
94
- "mc-llava_next_llama3_8b-LORA-embodied_v4_8_28-8_29-A800-c8-e3-b4-a4-24": {
95
- "visual-advance": 0.35,
96
- "reason": 0.3076923076923077,
97
- "knowledge": 0.3275862068965517,
98
- "visual-basic": 0.37209302325581395
99
  },
100
- "mc-sft-llava_next_8b-mcqa_v3_12_25_277k-11_15-epoch1-24": {
101
- "reason": 0.23076923076923078,
102
- "visual-advance": 0.25,
103
- "knowledge": 0.6724137931034483,
104
- "visual-basic": 0.11627906976744186
105
- },
106
- "qwen2-vl-7b-instruct-24": {
107
- "gui": {
108
  "100": 0.41,
109
  "20": 0.08,
110
  "200": 0.76,
111
  "50": 0.21
112
  },
113
- "embodied": {
114
  "100": 0.40458015267175573,
115
  "20": 0.13740458015267176,
116
  "200": 0.5572519083969466,
117
  "50": 0.25190839694656486
118
  },
119
- "knowledge": 0.06896551724137931,
120
- "visual-basic": 0.4186046511627907,
121
- "visual-advance": 0.375,
122
- "reason": 0.15384615384615385
123
  },
124
- "llama-3.2-11b-vision-instruct-24": {
125
- "visual-basic": 0.4418604651162791,
126
- "visual-advance": 0.25,
127
- "reason": 0.23076923076923078,
128
- "knowledge": 0.20689655172413793
129
  },
130
- "qwen2-vl-72b-instruct-24": {
131
- "gui": {
132
  "100": 0.0,
133
  "20": 0.0,
134
  "200": 0.0,
135
  "50": 0.0
136
  },
137
- "embodied": {
138
  "100": 0.35877862595419846,
139
  "20": 0.09541984732824428,
140
  "200": 0.5038167938931297,
141
  "50": 0.22137404580152673
142
  }
143
  },
144
- "mc-sft-llava_next_8b-mcqa_v3_12_25_277k-12_13-A100-c8-e1-b8-a1-4312-24": {
145
- "knowledge": 0.6724137931034483,
146
- "reason": 0.38461538461538464,
147
- "visual-basic": 0.20930232558139536,
148
- "visual-advance": 0.45
149
- },
150
- "mc-vsft-sft-llama3_llava_next_8b-mcvqa_v4_11_21_80k-12-15-A100-c8-e1-b4-a4-631-24": {
151
- "knowledge": 0.603448275862069,
152
- "reason": 0.3076923076923077,
153
- "visual-basic": 0.5581395348837209,
154
- "visual-advance": 0.55
155
- },
156
- "mc-vsft-sft-llava_v1.6_vicuna_13b-mcvqa_v4_11_21_80k-12_17-A100-c8-e2-b4-a4-631-24": {
157
- "knowledge": 0.6206896551724138,
158
- "reason": 0.5384615384615384,
159
- "visual-basic": 0.7441860465116279,
160
- "visual-advance": 0.7
161
- },
162
- "mc-sft-qwen2_vl_7b-mcqa_v3_12_25_277k-12_13-A100-c8-e1-b8-a1-24": {
163
- "knowledge": 0.6551724137931034,
164
- "visual-advance": 0.45,
165
- "reason": 0.38461538461538464,
166
- "visual-basic": 0.46511627906976744
167
- },
168
- "vsft_qwen_7b-A800-c8-e1-b1-a4-24": {
169
- "visual-basic": 0.6511627906976745,
170
- "reason": 0.23076923076923078,
171
- "knowledge": 0.6206896551724138,
172
- "visual-advance": 0.75
173
- },
174
- "vsft_qwen_2b-A800-c8-e1-b1-a4-24": {
175
- "reason": 0.15384615384615385,
176
- "visual-advance": 0.7,
177
- "visual-basic": 0.5813953488372093,
178
- "knowledge": 0.1724137931034483
179
- },
180
- "point-qwen2-vl-7b-1000-250116_old-24": {
181
- "gui": {
182
- "100": 0.65,
183
- "20": 0.02,
184
- "50": 0.2,
185
- "200": 0.97
186
- },
187
- "embodied": {
188
- "100": 0.4198473282442748,
189
- "20": 0.030534351145038167,
190
- "50": 0.16030534351145037,
191
- "200": 0.6450381679389313
192
- }
193
- },
194
- "mc-point-qwen2-vl-7b-6000+2676-250118-24": {
195
- "gui": {
196
- "100": 0.11,
197
- "20": 0.0,
198
- "50": 0.0,
199
- "200": 0.35
200
- },
201
- "embodied": {
202
- "100": 0.48091603053435117,
203
- "20": 0.04961832061068702,
204
- "50": 0.2099236641221374,
205
- "200": 0.7137404580152672
206
- }
207
- },
208
- "point-qwen2-vl-7b-7000-250116-24": {
209
- "gui": {
210
- "100": 0.19,
211
- "20": 0.01,
212
- "50": 0.04,
213
- "200": 0.35
214
- },
215
- "embodied": {
216
- "100": 0.4389312977099237,
217
- "20": 0.03816793893129771,
218
- "50": 0.16030534351145037,
219
- "200": 0.648854961832061
220
- }
221
- },
222
- "point-qwen2-vl-7b-5000-250116-24": {
223
- "gui": {
224
- "100": 0.0,
225
- "20": 0.0,
226
- "50": 0.0,
227
- "200": 0.0
228
- },
229
- "embodied": {
230
- "100": 0.1297709923664122,
231
- "20": 0.015267175572519083,
232
- "50": 0.08015267175572519,
233
- "200": 0.16793893129770993
234
- }
235
- },
236
- "mc-point-qwen2-vl-7b-919-250118-24": {
237
- "gui": {
238
- "100": 0.04,
239
- "20": 0.0,
240
- "50": 0.0,
241
- "200": 0.12
242
- },
243
- "embodied": {
244
- "100": 0.4312977099236641,
245
- "20": 0.04198473282442748,
246
- "50": 0.17938931297709923,
247
- "200": 0.5801526717557252
248
- }
249
- },
250
- "mc-point-qwen2-vl-7b-6000+1200-250118_old-24": {
251
- "gui": {
252
- "100": 0.21,
253
- "20": 0.0,
254
- "50": 0.02,
255
- "200": 0.45
256
- },
257
- "embodied": {
258
- "100": 0.41603053435114506,
259
- "20": 0.04961832061068702,
260
- "50": 0.183206106870229,
261
- "200": 0.6641221374045801
262
- }
263
- },
264
- "point-qwen2-vl-7b-13885-250116_old-24": {
265
- "gui": {
266
- "100": 0.0,
267
- "20": 0.0,
268
- "50": 0.0,
269
- "200": 0.0
270
- },
271
- "embodied": {
272
- "100": 0.4961832061068702,
273
- "20": 0.03435114503816794,
274
- "50": 0.21374045801526717,
275
- "200": 0.6984732824427481
276
- }
277
- },
278
- "mc-point-toy-qwen2-vl-7b-2812-24": {
279
- "gui": {
280
- "100": 0.8,
281
- "20": 0.0,
282
- "50": 0.36,
283
- "200": 0.99
284
- },
285
- "embodied": {
286
- "100": 0.061068702290076333,
287
- "20": 0.0,
288
- "50": 0.019083969465648856,
289
- "200": 0.14885496183206107
290
- }
291
- },
292
- "mc-point-toy-qwen2-vl-7b-703-24": {
293
- "gui": {
294
- "100": 0.74,
295
- "20": 0.02,
296
- "50": 0.43,
297
- "200": 0.98
298
- },
299
- "embodied": {
300
- "100": 0.12595419847328243,
301
- "20": 0.007633587786259542,
302
- "50": 0.03816793893129771,
303
- "200": 0.2786259541984733
304
- }
305
- },
306
- "point-qwen2-vl-7b-8000-250116-24": {
307
- "gui": {
308
- "100": 0.03,
309
- "20": 0.0,
310
- "50": 0.0,
311
- "200": 0.03
312
- },
313
- "embodied": {
314
- "100": 0.5458015267175572,
315
- "20": 0.05343511450381679,
316
- "50": 0.21755725190839695,
317
- "200": 0.732824427480916
318
- }
319
- },
320
- "mc-point-toy-qwen2-vl-7b-2109-24": {
321
- "gui": {
322
- "100": 0.81,
323
- "20": 0.03,
324
- "50": 0.35,
325
- "200": 0.99
326
- },
327
- "embodied": {
328
- "100": 0.05343511450381679,
329
- "20": 0.0,
330
- "50": 0.007633587786259542,
331
- "200": 0.13358778625954199
332
- }
333
- },
334
- "point-qwen2-vl-7b-3000-250116-24": {
335
- "gui": {
336
- "100": 0.04,
337
- "20": 0.0,
338
- "50": 0.01,
339
- "200": 0.08
340
- },
341
- "embodied": {
342
- "100": 0.3893129770992366,
343
- "20": 0.0648854961832061,
344
- "50": 0.2099236641221374,
345
- "200": 0.5610687022900763
346
- }
347
- },
348
- "point-qwen2-vl-7b-6000-250116-24": {
349
- "gui": {
350
- "100": 0.34,
351
- "20": 0.02,
352
- "50": 0.08,
353
- "200": 0.57
354
- },
355
- "embodied": {
356
- "100": 0.061068702290076333,
357
- "20": 0.011450381679389313,
358
- "50": 0.030534351145038167,
359
- "200": 0.09923664122137404
360
- }
361
- },
362
- "point-qwen2-vl-7b-9000-250116-24": {
363
- "gui": {
364
- "100": 0.0,
365
- "20": 0.0,
366
- "50": 0.0,
367
- "200": 0.0
368
- },
369
- "embodied": {
370
- "100": 0.4541984732824427,
371
- "20": 0.03816793893129771,
372
- "50": 0.23282442748091603,
373
- "200": 0.6259541984732825
374
- }
375
- },
376
- "point-qwen2-vl-7b-11000-250116-24": {
377
- "gui": {
378
- "100": 0.0,
379
- "20": 0.0,
380
- "50": 0.0,
381
- "200": 0.0
382
- },
383
- "embodied": {
384
- "100": 0.46946564885496184,
385
- "20": 0.07633587786259542,
386
- "50": 0.18702290076335878,
387
- "200": 0.6564885496183206
388
- }
389
- },
390
- "mc-point-toy-qwen2-vl-7b-175-24": {
391
- "gui": {
392
- "100": 0.69,
393
- "20": 0.04,
394
- "50": 0.44,
395
- "200": 0.96
396
- },
397
- "embodied": {
398
- "100": 0.0916030534351145,
399
- "20": 0.007633587786259542,
400
- "50": 0.015267175572519083,
401
- "200": 0.19083969465648856
402
- }
403
- },
404
- "mc-point-qwen2-vl-7b-300-250118-24": {
405
- "gui": {
406
- "100": 0.03,
407
- "20": 0.0,
408
- "50": 0.0,
409
- "200": 0.08
410
- },
411
- "embodied": {
412
- "100": 0.35877862595419846,
413
- "20": 0.030534351145038167,
414
- "50": 0.17557251908396945,
415
- "200": 0.5038167938931297
416
- }
417
- },
418
- "mc-point-qwen2-vl-7b-6000+2400-250118_old-24": {
419
- "gui": {
420
- "100": 0.11,
421
- "20": 0.0,
422
- "50": 0.0,
423
- "200": 0.34
424
- },
425
- "embodied": {
426
- "100": 0.5038167938931297,
427
- "20": 0.05343511450381679,
428
- "50": 0.1946564885496183,
429
- "200": 0.683206106870229
430
- }
431
- },
432
- "point-qwen2-vl-7b-13000-250116-24": {
433
- "gui": {
434
- "100": 0.0,
435
- "20": 0.0,
436
- "50": 0.0,
437
- "200": 0.0
438
- },
439
- "embodied": {
440
- "100": 0.5,
441
- "20": 0.05725190839694656,
442
- "50": 0.16412213740458015,
443
- "200": 0.6679389312977099
444
- }
445
- },
446
- "point-qwen2-vl-7b-12000-250116-24": {
447
- "gui": {
448
- "100": 0.0,
449
- "20": 0.0,
450
- "50": 0.0,
451
- "200": 0.0
452
- },
453
- "embodied": {
454
- "100": 0.4618320610687023,
455
- "20": 0.030534351145038167,
456
- "50": 0.1717557251908397,
457
- "200": 0.6641221374045801
458
- }
459
- },
460
- "point-qwen2-vl-7b-4000-250116-24": {
461
- "gui": {
462
- "100": 0.0,
463
- "20": 0.0,
464
- "50": 0.0,
465
- "200": 0.0
466
- },
467
- "embodied": {
468
- "100": 0.07251908396946564,
469
- "20": 0.007633587786259542,
470
- "50": 0.03816793893129771,
471
- "200": 0.11450381679389313
472
- }
473
- },
474
- "point-qwen2-vl-7b-2000-250116-24": {
475
- "gui": {
476
- "100": 0.02,
477
- "20": 0.0,
478
- "50": 0.02,
479
- "200": 0.1
480
- },
481
- "embodied": {
482
- "100": 0.3931297709923664,
483
- "20": 0.061068702290076333,
484
- "50": 0.183206106870229,
485
- "200": 0.5877862595419847
486
- }
487
- },
488
- "point-qwen2-vl-7b-10000-250116-24": {
489
- "gui": {
490
- "100": 0.0,
491
- "20": 0.0,
492
- "50": 0.0,
493
- "200": 0.0
494
- },
495
- "embodied": {
496
- "100": 0.48854961832061067,
497
- "20": 0.04198473282442748,
498
- "50": 0.20610687022900764,
499
- "200": 0.6526717557251909
500
- }
501
- },
502
- "mc-point-qwen2-vl-7b-25021002-A800-c32-e1-b8-a1_checkpoint-800-24": {
503
- "gui": {
504
  "100": 0.69,
505
  "20": 0.4,
506
  "200": 0.95,
507
  "50": 0.51
508
  },
509
- "embodied": {
510
  "100": 0.6603053435114504,
511
  "20": 0.22137404580152673,
512
  "200": 0.7862595419847328,
513
  "50": 0.4732824427480916
514
  },
515
- "reason": 0.38461538461538464,
516
- "knowledge": 0.6724137931034483,
517
- "visual-basic": 0.5116279069767442,
518
- "visual-advance": 0.475
519
- },
520
- "mc-point-qwen2-vl-7b-25021002-A800-c32-e1-b8-a1_checkpoint-1000-24": {
521
- "gui": {
522
- "100": 0.69,
523
- "20": 0.38,
524
- "200": 0.96,
525
- "50": 0.5
526
- },
527
- "embodied": {
528
- "100": 0.6641221374045801,
529
- "20": 0.25190839694656486,
530
- "200": 0.7786259541984732,
531
- "50": 0.48091603053435117
532
- },
533
- "reason": 0.3076923076923077,
534
- "knowledge": 0.6206896551724138,
535
- "visual-basic": 0.6046511627906976,
536
- "visual-advance": 0.525
537
- },
538
- "mc-point-qwen2-vl-7b-25021002-A800-c32-e1-b8-a1_checkpoint-400-24": {
539
- "gui": {
540
- "100": 0.61,
541
- "20": 0.33,
542
- "200": 0.96,
543
- "50": 0.5
544
- },
545
- "embodied": {
546
- "100": 0.6335877862595419,
547
- "20": 0.20229007633587787,
548
- "200": 0.7977099236641222,
549
- "50": 0.4389312977099237
550
- },
551
- "reason": 0.3076923076923077,
552
- "knowledge": 0.7413793103448276,
553
- "visual-basic": 0.5348837209302325,
554
- "visual-advance": 0.45
555
- },
556
- "mc-point-qwen2-vl-7b-25021002-A800-c32-e1-b8-a1_checkpoint-600-24": {
557
- "gui": {
558
- "100": 0.67,
559
- "20": 0.34,
560
- "200": 0.96,
561
- "50": 0.51
562
- },
563
- "embodied": {
564
- "100": 0.6374045801526718,
565
- "20": 0.20610687022900764,
566
- "200": 0.7900763358778626,
567
- "50": 0.44656488549618323
568
- },
569
- "reason": 0.23076923076923078,
570
- "knowledge": 0.7068965517241379,
571
- "visual-basic": 0.46511627906976744,
572
- "visual-advance": 0.55
573
- },
574
- "mc-anneal-qwen2-vl-7b-25021003-A800-c16-e1-b8-a1_checkpoint-200-24": {
575
- "gui": {
576
- "100": 0.41,
577
- "20": 0.2,
578
- "200": 0.66,
579
- "50": 0.29
580
- },
581
- "embodied": {
582
- "100": 0.20229007633587787,
583
- "20": 0.03816793893129771,
584
- "200": 0.40458015267175573,
585
- "50": 0.10305343511450382
586
- }
587
- },
588
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-2000-24": {
589
- "gui": {
590
- "100": 0.56,
591
- "20": 0.36,
592
- "200": 0.71,
593
- "50": 0.45
594
- },
595
- "embodied": {
596
- "100": 0.08015267175572519,
597
- "20": 0.022900763358778626,
598
- "200": 0.14885496183206107,
599
- "50": 0.03816793893129771
600
- }
601
- },
602
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-400-24": {
603
- "gui": {
604
- "100": 0.52,
605
- "20": 0.23,
606
- "200": 0.84,
607
- "50": 0.36
608
- },
609
- "embodied": {
610
- "100": 0.12213740458015267,
611
- "20": 0.019083969465648856,
612
- "200": 0.1984732824427481,
613
- "50": 0.0648854961832061
614
- }
615
  },
616
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-3000-24": {
617
- "gui": {
618
  "100": 0.57,
619
  "20": 0.38,
620
  "200": 0.74,
621
  "50": 0.47
622
  },
623
- "embodied": {
624
  "100": 0.0916030534351145,
625
  "20": 0.015267175572519083,
626
  "200": 0.15648854961832062,
627
  "50": 0.03435114503816794
628
  }
629
  },
630
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-2400-24": {
631
- "gui": {
632
- "100": 0.55,
633
- "20": 0.33,
634
- "200": 0.72,
635
- "50": 0.47
636
- },
637
- "embodied": {
638
- "100": 0.0916030534351145,
639
- "20": 0.019083969465648856,
640
- "200": 0.16412213740458015,
641
- "50": 0.04198473282442748
642
- }
643
- },
644
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-2800-24": {
645
- "gui": {
646
- "100": 0.56,
647
- "20": 0.36,
648
- "200": 0.72,
649
- "50": 0.45
650
- },
651
- "embodied": {
652
- "100": 0.09923664122137404,
653
- "20": 0.019083969465648856,
654
- "200": 0.16412213740458015,
655
- "50": 0.04198473282442748
656
- }
657
- },
658
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-1200-24": {
659
- "gui": {
660
- "100": 0.51,
661
- "20": 0.32,
662
- "200": 0.66,
663
- "50": 0.43
664
- },
665
- "embodied": {
666
- "100": 0.09541984732824428,
667
- "20": 0.030534351145038167,
668
- "200": 0.1717557251908397,
669
- "50": 0.05725190839694656
670
- }
671
- },
672
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-1600-24": {
673
- "gui": {
674
- "100": 0.53,
675
- "20": 0.26,
676
- "200": 0.67,
677
- "50": 0.41
678
- },
679
- "embodied": {
680
- "100": 0.0916030534351145,
681
- "20": 0.022900763358778626,
682
- "200": 0.1717557251908397,
683
- "50": 0.04198473282442748
684
- }
685
- },
686
- "mc-bbox-qwen2-vl-7b-25020805-A800-c32-e1-b2-a1_checkpoint-800-24": {
687
- "gui": {
688
- "100": 0.48,
689
- "20": 0.23,
690
- "200": 0.7,
691
- "50": 0.35
692
- },
693
- "embodied": {
694
- "100": 0.12213740458015267,
695
- "20": 0.03435114503816794,
696
- "200": 0.24427480916030533,
697
- "50": 0.06870229007633588
698
- }
699
- },
700
- "mc-point-qwen2-vl-7b-6000+1200-250118-24": {
701
- "gui": {
702
- "100": 0.2,
703
- "20": 0.0,
704
- "200": 0.48,
705
- "50": 0.03
706
- },
707
- "embodied": {
708
- "100": 0.6374045801526718,
709
- "20": 0.05725190839694656,
710
- "200": 0.8015267175572519,
711
- "50": 0.2900763358778626
712
- }
713
- },
714
- "mc-point-qwen2-vl-7b-6000+1800-250118-24": {
715
- "gui": {
716
- "100": 0.17,
717
- "20": 0.0,
718
- "200": 0.44,
719
- "50": 0.02
720
- },
721
- "embodied": {
722
- "100": 0.6145038167938931,
723
- "20": 0.04198473282442748,
724
- "200": 0.7862595419847328,
725
- "50": 0.183206106870229
726
- }
727
- },
728
- "mc-point-qwen2-vl-7b-6000+2400-250118-24": {
729
- "gui": {
730
- "100": 0.13,
731
- "20": 0.0,
732
- "200": 0.39,
733
- "50": 0.0
734
- },
735
- "embodied": {
736
- "100": 0.6221374045801527,
737
- "20": 0.04961832061068702,
738
- "200": 0.7900763358778626,
739
- "50": 0.1984732824427481
740
- }
741
- },
742
- "point-qwen2-vl-7b-1000-250116-24": {
743
- "gui": {
744
- "100": 0.63,
745
- "20": 0.02,
746
- "200": 0.96,
747
- "50": 0.19
748
- },
749
- "embodied": {
750
- "100": 0.5458015267175572,
751
- "20": 0.0916030534351145,
752
- "200": 0.7061068702290076,
753
- "50": 0.29770992366412213
754
- }
755
- },
756
- "point-qwen2-vl-7b-13885-250116-24": {
757
- "gui": {
758
- "100": 0.09,
759
- "20": 0.01,
760
- "200": 0.2,
761
- "50": 0.01
762
- },
763
- "embodied": {
764
- "100": 0.6221374045801527,
765
- "20": 0.061068702290076333,
766
- "200": 0.7938931297709924,
767
- "50": 0.1984732824427481
768
- }
769
- },
770
- "point-qwen2-7b-step_6000-250113-24": {
771
- "gui": {
772
- "100": 0.46,
773
- "20": 0.14,
774
- "200": 0.84,
775
- "50": 0.19
776
- },
777
- "embodied": {
778
- "100": 0.026717557251908396,
779
- "20": 0.0,
780
- "200": 0.10687022900763359,
781
- "50": 0.003816793893129771
782
- }
783
- },
784
- "mc-point-qwen2-vl-7b-25021002-A800-c32-e1-b8-a1checkpoint-1123-24": {
785
- "gui": {
786
- "100": 0.73,
787
- "20": 0.39,
788
- "200": 0.95,
789
- "50": 0.51
790
- },
791
- "embodied": {
792
- "100": 0.6755725190839694,
793
- "20": 0.26717557251908397,
794
- "200": 0.7862595419847328,
795
- "50": 0.4732824427480916
796
- },
797
- "reason": 0.3076923076923077,
798
- "knowledge": 0.6379310344827587,
799
- "visual-basic": 0.4186046511627907,
800
- "visual-advance": 0.25
801
- },
802
- "qwen2-vl-2b-instruct-24": {
803
- "gui": {
804
- "100": 0.0,
805
- "20": 0.0,
806
- "200": 0.01,
807
- "50": 0.0
808
- },
809
- "embodied": {
810
- "100": 0.0,
811
- "20": 0.0,
812
- "200": 0.0,
813
- "50": 0.0
814
- }
815
- },
816
- "anneal-vla-qwen2-vl-7b-250211-A800-c32-e1-b4-a1-24": {
817
- "gui": {
818
- "100": 0.43,
819
- "20": 0.06,
820
- "200": 0.93,
821
- "50": 0.17
822
- },
823
- "embodied": {
824
- "100": 0.24045801526717558,
825
- "20": 0.019083969465648856,
826
- "200": 0.6183206106870229,
827
- "50": 0.08778625954198473
828
- },
829
- "reason": 0.46153846153846156,
830
- "knowledge": 0.6896551724137931,
831
- "visual-basic": 0.4186046511627907,
832
- "visual-advance": 0.45
833
- },
834
- "anneal-point-qwen2-vl-7b-250211-A800-c32-e1-b4-a1-24": {
835
- "gui": {
836
  "100": 0.74,
837
  "20": 0.38,
838
  "200": 0.93,
839
  "50": 0.56
840
  },
841
- "embodied": {
842
  "100": 0.648854961832061,
843
  "20": 0.2099236641221374,
844
  "200": 0.7786259541984732,
845
  "50": 0.4618320610687023
846
  },
847
- "reason": 0.38461538461538464,
848
- "knowledge": 0.6982758620689655,
849
- "visual-basic": 0.5348837209302325,
850
- "visual-advance": 0.55
851
  },
852
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-400-24": {
853
- "gui": {
854
- "100": 0.85,
855
- "20": 0.67,
856
- "200": 0.98,
857
- "50": 0.75
858
- },
859
- "embodied": {
860
- "100": 0.5114503816793893,
861
- "20": 0.09923664122137404,
862
- "200": 0.7442748091603053,
863
- "50": 0.32061068702290074
864
- },
865
- "knowledge": 0.7586206896551724,
866
- "visual-basic": 0.5581395348837209,
867
- "visual-advance": 0.4,
868
- "reason": 0.5384615384615384
869
- },
870
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-800-24": {
871
- "gui": {
872
- "100": 0.87,
873
- "20": 0.7,
874
- "200": 0.98,
875
- "50": 0.79
876
- },
877
- "embodied": {
878
- "100": 0.5458015267175572,
879
- "20": 0.1450381679389313,
880
- "200": 0.767175572519084,
881
- "50": 0.35877862595419846
882
- },
883
- "knowledge": 0.7068965517241379,
884
- "visual-basic": 0.5581395348837209,
885
- "visual-advance": 0.5,
886
- "reason": 0.38461538461538464
887
- },
888
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-1600-24": {
889
- "gui": {
890
- "100": 0.84,
891
- "20": 0.73,
892
- "200": 0.97,
893
- "50": 0.82
894
- },
895
- "embodied": {
896
- "100": 0.6145038167938931,
897
- "20": 0.183206106870229,
898
- "200": 0.7786259541984732,
899
- "50": 0.4083969465648855
900
- },
901
- "knowledge": 0.6896551724137931,
902
- "visual-basic": 0.6046511627906976,
903
- "visual-advance": 0.45,
904
- "reason": 0.46153846153846156
905
- },
906
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-2400-24": {
907
- "gui": {
908
  "100": 0.92,
909
  "20": 0.79,
910
  "200": 0.99,
911
  "50": 0.88
912
  },
913
- "embodied": {
914
  "100": 0.6068702290076335,
915
  "20": 0.2099236641221374,
916
  "200": 0.7824427480916031,
917
  "50": 0.41603053435114506
918
  },
919
- "knowledge": 0.7068965517241379,
920
- "visual-basic": 0.7674418604651163,
921
- "visual-advance": 0.55,
922
- "reason": 0.46153846153846156
923
  },
924
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-3200-24": {
925
- "gui": {
926
  "100": 0.93,
927
  "20": 0.84,
928
  "200": 0.99,
929
  "50": 0.9
930
  },
931
- "embodied": {
932
  "100": 0.6259541984732825,
933
  "20": 0.24427480916030533,
934
  "200": 0.7748091603053435,
935
  "50": 0.46946564885496184
936
  },
937
- "knowledge": 0.6810344827586207,
938
- "visual-basic": 0.6976744186046512,
939
- "visual-advance": 0.55,
940
- "reason": 0.38461538461538464
941
- },
942
- "mc-qwen2-vl-7b-250215-A800-c32-e1-b4-a1checkpoint-4000-24": {
943
- "gui": {
944
- "100": 0.92,
945
- "20": 0.83,
946
- "200": 0.98,
947
- "50": 0.88
948
- },
949
- "embodied": {
950
- "100": 0.6145038167938931,
951
- "20": 0.22900763358778625,
952
- "200": 0.7786259541984732,
953
- "50": 0.4580152671755725
954
- },
955
- "knowledge": 0.7068965517241379,
956
- "visual-basic": 0.6744186046511628,
957
- "visual-advance": 0.55,
958
- "reason": 0.46153846153846156
959
- },
960
- "mc-point-qwen2-vl-2b-250217-A800-c32-e1-b4-a1_checkpoint-800-24": {
961
- "gui": {
962
- "100": 0.52,
963
- "20": 0.32,
964
- "200": 0.9,
965
- "50": 0.4
966
- },
967
- "embodied": {
968
- "100": 0.42748091603053434,
969
- "20": 0.0916030534351145,
970
- "200": 0.6717557251908397,
971
- "50": 0.27099236641221375
972
- },
973
- "knowledge": 0.3103448275862069,
974
- "visual-basic": 0.5116279069767442,
975
- "visual-advance": 0.4,
976
- "reason": 0.15384615384615385
977
  },
978
- "mc-point-qwen2-vl-2b-250217-A800-c32-e1-b4-a1_checkpoint-1600-24": {
979
- "gui": {
980
  "100": 0.54,
981
  "20": 0.32,
982
  "200": 0.93,
983
  "50": 0.38
984
  },
985
- "embodied": {
986
  "100": 0.5038167938931297,
987
  "20": 0.08015267175572519,
988
  "200": 0.7175572519083969,
989
  "50": 0.2748091603053435
990
  },
991
- "knowledge": 0.3103448275862069,
992
- "visual-basic": 0.5116279069767442,
993
- "visual-advance": 0.4,
994
- "reason": 0.38461538461538464
995
- },
996
- "mc-point-qwen2-vl-2b-250217-A800-c32-e1-b4-a1_checkpoint-2400-24": {
997
- "gui": {
998
- "100": 0.56,
999
- "20": 0.34,
1000
- "200": 0.94,
1001
- "50": 0.4
1002
- },
1003
- "embodied": {
1004
- "100": 0.4961832061068702,
1005
- "20": 0.14885496183206107,
1006
- "200": 0.7480916030534351,
1007
- "50": 0.31679389312977096
1008
- }
1009
- },
1010
- "mc-point-qwen2-vl-2b-250217-A800-c32-e1-b4-a1_checkpoint-3200-24": {
1011
- "gui": {
1012
- "100": 0.54,
1013
- "20": 0.4,
1014
- "200": 0.96,
1015
- "50": 0.44
1016
- },
1017
- "embodied": {
1018
- "100": 0.5381679389312977,
1019
- "20": 0.15648854961832062,
1020
- "200": 0.7480916030534351,
1021
- "50": 0.3549618320610687
1022
- }
1023
  },
1024
- "mc-point-qwen2-vl-2b-250217-A800-c32-e1-b4-a1_checkpoint-4000-24": {
1025
- "gui": {
1026
  "100": 0.59,
1027
  "20": 0.39,
1028
  "200": 0.94,
1029
  "50": 0.45
1030
  },
1031
- "embodied": {
1032
  "100": 0.5305343511450382,
1033
  "20": 0.16793893129770993,
1034
  "200": 0.7557251908396947,
1035
  "50": 0.3473282442748092
1036
  }
1037
  },
1038
- "mc-llava-next-vicuna-13b-250217-A800-c32-e1-b4-a1_checkpoint-4000-24": {
1039
- "gui": {
1040
  "100": 0.74,
1041
  "20": 0.6,
1042
  "200": 0.99,
1043
  "50": 0.68
1044
  },
1045
- "embodied": {
1046
  "100": 0.6259541984732825,
1047
  "20": 0.15267175572519084,
1048
  "200": 0.7748091603053435,
1049
  "50": 0.4122137404580153
1050
  },
1051
- "knowledge": 0.603448275862069,
1052
- "visual-basic": 0.4883720930232558,
1053
- "visual-advance": 0.4,
1054
- "reason": 0.46153846153846156
1055
- },
1056
- "mc-llava-next-vicuna-13b-250217-A800-c32-e1-b4-a1_checkpoint-3200-24": {
1057
- "gui": {
1058
- "100": 0.77,
1059
- "20": 0.58,
1060
- "200": 0.99,
1061
- "50": 0.69
1062
- },
1063
- "embodied": {
1064
- "100": 0.6412213740458015,
1065
- "20": 0.14122137404580154,
1066
- "200": 0.7862595419847328,
1067
- "50": 0.3931297709923664
1068
- },
1069
- "knowledge": 0.6551724137931034,
1070
- "visual-basic": 0.4883720930232558,
1071
- "visual-advance": 0.45,
1072
- "reason": 0.3076923076923077
1073
- },
1074
- "mc-llava-next-vicuna-13b-250217-A800-c32-e1-b4-a1_checkpoint-2400-24": {
1075
- "gui": {
1076
- "100": 0.75,
1077
- "20": 0.49,
1078
- "200": 0.98,
1079
- "50": 0.67
1080
- },
1081
- "embodied": {
1082
- "100": 0.6030534351145038,
1083
- "20": 0.13740458015267176,
1084
- "200": 0.767175572519084,
1085
- "50": 0.3702290076335878
1086
- },
1087
- "knowledge": 0.6206896551724138,
1088
- "visual-basic": 0.5348837209302325,
1089
- "visual-advance": 0.45,
1090
- "reason": 0.3076923076923077
1091
- },
1092
- "mc-llava-next-vicuna-13b-250217-A800-c32-e1-b4-a1_checkpoint-1600-24": {
1093
- "gui": {
1094
- "100": 0.76,
1095
- "20": 0.33,
1096
- "200": 1.0,
1097
- "50": 0.65
1098
- },
1099
- "embodied": {
1100
- "100": 0.5687022900763359,
1101
- "20": 0.12213740458015267,
1102
- "200": 0.7748091603053435,
1103
- "50": 0.366412213740458
1104
- },
1105
- "knowledge": 0.5689655172413793,
1106
- "visual-basic": 0.5581395348837209,
1107
- "visual-advance": 0.35,
1108
- "reason": 0.3076923076923077
1109
- },
1110
- "mc-llava-next-vicuna-13b-250217-A800-c32-e1-b4-a1_checkpoint-800-24": {
1111
- "gui": {
1112
- "100": 0.74,
1113
- "20": 0.07,
1114
- "200": 0.93,
1115
- "50": 0.42
1116
- },
1117
- "embodied": {
1118
- "100": 0.5305343511450382,
1119
- "20": 0.08015267175572519,
1120
- "200": 0.7709923664122137,
1121
- "50": 0.31297709923664124
1122
- },
1123
- "knowledge": 0.5344827586206896,
1124
- "visual-basic": 0.4883720930232558,
1125
- "visual-advance": 0.4,
1126
- "reason": 0.38461538461538464
1127
- },
1128
- "mc-reason-qwen2-vl-7b-250224-A800-c32-e1-b4-a1_checkpoint-800-24": {
1129
- "gui": {
1130
- "100": 0.0,
1131
- "20": 0.0,
1132
- "200": 0.0,
1133
- "50": 0.0
1134
- },
1135
- "embodied": {
1136
- "100": 0.0,
1137
- "20": 0.0,
1138
- "200": 0.0,
1139
- "50": 0.0
1140
- },
1141
- "knowledge": 0.603448275862069,
1142
- "visual-basic": 0.5813953488372093,
1143
- "visual-advance": 0.45,
1144
- "reason": 0.42528735632183906
1145
- },
1146
- "mc-reason-qwen2-vl-7b-250224-A800-c32-e1-b4-a1_checkpoint-1600-24": {
1147
- "gui": {
1148
- "100": 0.0,
1149
- "20": 0.0,
1150
- "200": 0.0,
1151
- "50": 0.0
1152
- },
1153
- "embodied": {
1154
- "100": 0.0,
1155
- "20": 0.0,
1156
- "200": 0.003816793893129771,
1157
- "50": 0.0
1158
- },
1159
- "knowledge": 0.6379310344827587,
1160
- "visual-basic": 0.5813953488372093,
1161
- "visual-advance": 0.3,
1162
- "reason": 0.45977011494252873
1163
  },
1164
- "mc-reason-qwen2-vl-7b-250224-A800-c32-e1-b4-a1_checkpoint-2400-24": {
1165
- "gui": {
1166
  "100": 0.0,
1167
  "20": 0.0,
1168
  "200": 0.0,
1169
  "50": 0.0
1170
  },
1171
- "embodied": {
1172
  "100": 0.0,
1173
  "20": 0.0,
1174
  "200": 0.003816793893129771,
1175
  "50": 0.0
1176
  },
1177
- "knowledge": 0.6551724137931034,
1178
- "visual-basic": 0.6046511627906976,
1179
- "visual-advance": 0.325,
1180
- "reason": 0.5229885057471264
1181
  }
1182
  }
 
1
  {
2
+ "llama3-llava-next-8b-hf-2412": {
3
+ "VQA": 0.4186046511627907,
4
+ "QA": 0.1896551724137931,
5
+ "Reason": 0.23076923076923078,
6
+ "VQA_Reasoning": 0.25
7
+ },
8
+ "gpt-4o-2412": {
9
+ "QA": 0.9655172413793104,
10
+ "Reason": 0.7692307692307693,
11
+ "VQA_Reasoning": 0.85,
12
+ "VQA": 0.7674418604651163
13
+ },
14
+ "gpt-4o-mini-2412": {
15
+ "QA": 0.7586206896551724,
16
+ "Reason": 0.5384615384615384,
17
+ "VQA_Reasoning": 0.6,
18
+ "VQA": 0.627906976744186
19
+ },
20
+ "fuyu-8b-2412": {
21
+ "Reason": 0.0,
22
+ "QA": 0.017241379310344827
23
+ },
24
+ "llava-1.5-13b-hf-2412": {
25
+ "Reason": 0.0,
26
+ "VQA": 0.3023255813953488,
27
+ "QA": 0.0,
28
+ "VQA_Reasoning": 0.65
29
+ },
30
+ "llava-1.5-7b-hf-2412": {
31
+ "VQA_Reasoning": 0.45,
32
+ "VQA": 0.32558139534883723,
33
+ "Reason": 0.0,
34
+ "QA": 0.0
35
+ },
36
+ "llava-v1.6-mistral-7b-hf-2412": {
37
+ "VQA": 0.37209302325581395,
38
+ "VQA_Reasoning": 0.55,
39
+ "Reason": 0.15384615384615385,
40
+ "QA": 0.15517241379310345
41
+ },
42
+ "llava-v1.6-vicuna-13b-hf-2412": {
43
+ "VQA_Reasoning": 0.35,
44
+ "VQA": 0.4186046511627907,
45
+ "QA": 0.1724137931034483,
46
+ "Reason": 0.15384615384615385
47
+ },
48
+ "llava-v1.6-vicuna-7b-hf-2412": {
49
+ "Reason": 0.15384615384615385,
50
+ "VQA": 0.23255813953488372,
51
+ "VQA_Reasoning": 0.2,
52
+ "QA": 0.06896551724137931
53
+ },
54
+ "MiniCPM-V-2_6-2412": {
55
+ "Reason": 0.15384615384615385,
56
+ "VQA": 0.5116279069767442,
57
+ "VQA_Reasoning": 0.4,
58
+ "QA": 0.15517241379310345
59
+ },
60
+ "llava-gemma-2b-2412": {
61
+ "Reason": 0.0,
62
+ "VQA": 0.3023255813953488,
63
+ "VQA_Reasoning": 0.2,
64
+ "QA": 0.034482758620689655
65
+ },
66
+ "molmo-7b-d-0924-2412": {
67
+ "VQA": 0.5813953488372093,
68
+ "VQA_Reasoning": 0.15,
69
+ "QA": 0.1206896551724138,
70
+ "Reason": 0.15384615384615385,
71
+ "Gui_Grounding": {
 
 
 
 
72
  "100": 0.65,
73
  "20": 0.19,
74
  "50": 0.46,
75
  "200": 0.84
76
  },
77
+ "Embodied_Grounding": {
78
  "100": 0.30916030534351147,
79
  "20": 0.03435114503816794,
80
  "50": 0.16412213740458015,
81
  "200": 0.5
82
  }
83
  },
84
+ "molmo-72b-0924-2412": {
85
+ "Reason": 0.46153846153846156,
86
+ "VQA_Reasoning": 0.55,
87
+ "QA": 0.3620689655172414,
88
+ "VQA": 0.5581395348837209
89
  },
90
+ "mc-sft-llava_next_8b-mcqa_v3_12_25_277k-2411": {
91
+ "Reason": 0.23076923076923078,
92
+ "VQA_Reasoning": 0.25,
93
+ "QA": 0.6724137931034483,
94
+ "VQA": 0.11627906976744186
95
  },
96
+ "qwen2-vl-7b-instruct-2412": {
97
+ "Gui_Grounding": {
 
 
 
 
 
 
98
  "100": 0.41,
99
  "20": 0.08,
100
  "200": 0.76,
101
  "50": 0.21
102
  },
103
+ "Embodied_Grounding": {
104
  "100": 0.40458015267175573,
105
  "20": 0.13740458015267176,
106
  "200": 0.5572519083969466,
107
  "50": 0.25190839694656486
108
  },
109
+ "QA": 0.06896551724137931,
110
+ "VQA": 0.4186046511627907,
111
+ "VQA_Reasoning": 0.375,
112
+ "Reason": 0.15384615384615385
113
  },
114
+ "llama-3.2-11b-vision-instruct-2412": {
115
+ "VQA": 0.4418604651162791,
116
+ "VQA_Reasoning": 0.25,
117
+ "Reason": 0.23076923076923078,
118
+ "QA": 0.20689655172413793
119
  },
120
+ "qwen2-vl-72b-instruct-2412": {
121
+ "Gui_Grounding": {
122
  "100": 0.0,
123
  "20": 0.0,
124
  "200": 0.0,
125
  "50": 0.0
126
  },
127
+ "Embodied_Grounding": {
128
  "100": 0.35877862595419846,
129
  "20": 0.09541984732824428,
130
  "200": 0.5038167938931297,
131
  "50": 0.22137404580152673
132
  }
133
  },
134
+ "mc-sft-llava_next_8b-mcqa_v3_12_25_277k-2412": {
135
+ "QA": 0.6724137931034483,
136
+ "Reason": 0.38461538461538464,
137
+ "VQA": 0.20930232558139536,
138
+ "VQA_Reasoning": 0.45
139
+ },
140
+ "mc-vsft-llama3_llava_next_8b-mcvqa_v4_11_21_80k-2412": {
141
+ "QA": 0.603448275862069,
142
+ "Reason": 0.3076923076923077,
143
+ "VQA": 0.5581395348837209,
144
+ "VQA_Reasoning": 0.55
145
+ },
146
+ "mc-vsft-llava_v1.6_vicuna_13b-mcvqa_v4_11_21_80k-2412": {
147
+ "QA": 0.6206896551724138,
148
+ "Reason": 0.5384615384615384,
149
+ "VQA": 0.7441860465116279,
150
+ "VQA_Reasoning": 0.7
151
+ },
152
+ "mc-sft-qwen2_vl_7b-mcqa_v3_12_25_277k-2412": {
153
+ "QA": 0.6551724137931034,
154
+ "VQA_Reasoning": 0.45,
155
+ "Reason": 0.38461538461538464,
156
+ "VQA": 0.46511627906976744
157
+ },
158
+ "mc-vsft-qwen2_vl_7b-2412": {
159
+ "VQA": 0.6511627906976745,
160
+ "Reason": 0.23076923076923078,
161
+ "QA": 0.6206896551724138,
162
+ "VQA_Reasoning": 0.75
163
+ },
164
+ "mc-vsft-qwen2_vl_2b-2412": {
165
+ "Reason": 0.15384615384615385,
166
+ "VQA_Reasoning": 0.7,
167
+ "VQA": 0.5813953488372093,
168
+ "QA": 0.1724137931034483
169
+ },
170
+ "mc-point-qwen2_vl_7b-2502": {
171
+ "Gui_Grounding": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  "100": 0.69,
173
  "20": 0.4,
174
  "200": 0.95,
175
  "50": 0.51
176
  },
177
+ "Embodied_Grounding": {
178
  "100": 0.6603053435114504,
179
  "20": 0.22137404580152673,
180
  "200": 0.7862595419847328,
181
  "50": 0.4732824427480916
182
  },
183
+ "Reason": 0.38461538461538464,
184
+ "QA": 0.6724137931034483,
185
+ "VQA": 0.5116279069767442,
186
+ "VQA_Reasoning": 0.475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  },
188
+ "mc-bbox-qwen2_vl_7b-2502": {
189
+ "Gui_Grounding": {
190
  "100": 0.57,
191
  "20": 0.38,
192
  "200": 0.74,
193
  "50": 0.47
194
  },
195
+ "Embodied_Grounding": {
196
  "100": 0.0916030534351145,
197
  "20": 0.015267175572519083,
198
  "200": 0.15648854961832062,
199
  "50": 0.03435114503816794
200
  }
201
  },
202
+ "mc-base-qwen2_vl_7b-2502": {
203
+ "Gui_Grounding": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  "100": 0.74,
205
  "20": 0.38,
206
  "200": 0.93,
207
  "50": 0.56
208
  },
209
+ "Embodied_Grounding": {
210
  "100": 0.648854961832061,
211
  "20": 0.2099236641221374,
212
  "200": 0.7786259541984732,
213
  "50": 0.4618320610687023
214
  },
215
+ "Reason": 0.38461538461538464,
216
+ "QA": 0.6982758620689655,
217
+ "VQA": 0.5348837209302325,
218
+ "VQA_Reasoning": 0.55
219
  },
220
+ "mc-base-qwen2_vl_7b-v2-ckpt2400-2502": {
221
+ "Gui_Grounding": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  "100": 0.92,
223
  "20": 0.79,
224
  "200": 0.99,
225
  "50": 0.88
226
  },
227
+ "Embodied_Grounding": {
228
  "100": 0.6068702290076335,
229
  "20": 0.2099236641221374,
230
  "200": 0.7824427480916031,
231
  "50": 0.41603053435114506
232
  },
233
+ "QA": 0.7068965517241379,
234
+ "VQA": 0.7674418604651163,
235
+ "VQA_Reasoning": 0.55,
236
+ "Reason": 0.46153846153846156
237
  },
238
+ "mc-base-qwen2_vl_7b-v2-ckpt3200-2502": {
239
+ "Gui_Grounding": {
240
  "100": 0.93,
241
  "20": 0.84,
242
  "200": 0.99,
243
  "50": 0.9
244
  },
245
+ "Embodied_Grounding": {
246
  "100": 0.6259541984732825,
247
  "20": 0.24427480916030533,
248
  "200": 0.7748091603053435,
249
  "50": 0.46946564885496184
250
  },
251
+ "QA": 0.6810344827586207,
252
+ "VQA": 0.6976744186046512,
253
+ "VQA_Reasoning": 0.55,
254
+ "Reason": 0.38461538461538464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  },
256
+ "mc-base-qwen2_vl_2b-v2-ckpt1600-2502": {
257
+ "Gui_Grounding": {
258
  "100": 0.54,
259
  "20": 0.32,
260
  "200": 0.93,
261
  "50": 0.38
262
  },
263
+ "Embodied_Grounding": {
264
  "100": 0.5038167938931297,
265
  "20": 0.08015267175572519,
266
  "200": 0.7175572519083969,
267
  "50": 0.2748091603053435
268
  },
269
+ "QA": 0.3103448275862069,
270
+ "VQA": 0.5116279069767442,
271
+ "VQA_Reasoning": 0.4,
272
+ "Reason": 0.38461538461538464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  },
274
+ "mc-base-qwen2_vl_2b-v2-ckpt4000-2502": {
275
+ "Gui_Grounding": {
276
  "100": 0.59,
277
  "20": 0.39,
278
  "200": 0.94,
279
  "50": 0.45
280
  },
281
+ "Embodied_Grounding": {
282
  "100": 0.5305343511450382,
283
  "20": 0.16793893129770993,
284
  "200": 0.7557251908396947,
285
  "50": 0.3473282442748092
286
  }
287
  },
288
+ "mc-base-llava_next_vicuna_13b-ckpt4000-2502": {
289
+ "Gui_Grounding": {
290
  "100": 0.74,
291
  "20": 0.6,
292
  "200": 0.99,
293
  "50": 0.68
294
  },
295
+ "Embodied_Grounding": {
296
  "100": 0.6259541984732825,
297
  "20": 0.15267175572519084,
298
  "200": 0.7748091603053435,
299
  "50": 0.4122137404580153
300
  },
301
+ "QA": 0.603448275862069,
302
+ "VQA": 0.4883720930232558,
303
+ "VQA_Reasoning": 0.4,
304
+ "Reason": 0.46153846153846156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  },
306
+ "mc-reason-qwen2_vl_7b-ckpt2400-2502": {
307
+ "Gui_Grounding": {
308
  "100": 0.0,
309
  "20": 0.0,
310
  "200": 0.0,
311
  "50": 0.0
312
  },
313
+ "Embodied_Grounding": {
314
  "100": 0.0,
315
  "20": 0.0,
316
  "200": 0.003816793893129771,
317
  "50": 0.0
318
  },
319
+ "QA": 0.6551724137931034,
320
+ "VQA": 0.6046511627906976,
321
+ "VQA_Reasoning": 0.325,
322
+ "Reason": 0.5229885057471264
323
  }
324
  }