File size: 9,426 Bytes
2ae3b27
 
35a89f7
aeb0da7
8d72cac
f063aad
 
a635b9d
35a89f7
c4ab057
 
 
52ac625
 
c4ab057
 
7340433
 
 
 
 
 
 
 
c1f5a69
1452546
 
 
 
 
 
1659677
1452546
 
 
2ae3b27
1452546
2ae3b27
1452546
 
 
2ae3b27
c1f5a69
1452546
 
 
 
 
1659677
1452546
4d42795
2ae3b27
59f1f35
1452546
 
 
 
 
 
 
59f1f35
 
1452546
 
 
59f1f35
1452546
59f1f35
1452546
 
 
59f1f35
 
0f19bde
 
 
 
 
 
 
 
7340433
 
 
 
 
 
 
 
 
 
 
2d895b3
 
7340433
 
 
 
 
 
 
 
 
 
 
2d895b3
 
7340433
 
85ae27d
1452546
 
 
 
 
 
 
 
 
 
0988648
1452546
0988648
1452546
 
 
85ae27d
 
7340433
 
 
 
 
 
 
 
 
2d895b3
 
7340433
 
ba77b4b
 
7340433
 
ba77b4b
a995534
7340433
2ae3b27
 
 
 
1fa59b4
7340433
2ae3b27
 
c4ab057
7340433
2ae3b27
df9540e
2ae3b27
8aaab52
7919585
480768c
 
 
 
 
 
 
46446c7
8aaab52
 
 
 
df9540e
2ae3b27
 
 
481d9fe
 
 
2ae3b27
 
 
 
 
 
 
 
 
 
fa9b35a
2ae3b27
 
fa9b35a
 
c1f5a69
 
4301853
e89adb4
7340433
 
c1f5a69
1452546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7340433
 
 
 
 
 
 
 
 
 
 
2d895b3
 
c1f5a69
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
# this is .py for store constants 
MODEL_INFO = [
    "Model Name (clickable)",
    "Total Score",
    "Quality Score",
    "Semantic Score",
    "Selected Score",
    ]

MODEL_INFO_TAB_QUALITY = [
    "Model Name (clickable)",
    "Quality Score",
    "Selected Score"
]

MODEL_INFO_TAB_I2V = [
    "Model Name (clickable)",
    "Total Score",
    "I2V Score",
    "Quality Score",
    "Selected Score"
]

TASK_INFO = [
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "dynamic degree",
    "imaging quality",
    "object class",
    "multiple objects",
    "human action",
    "color",
    "spatial relationship",
    "scene",
    "appearance style",
    "temporal style",
    "overall consistency"]

DEFAULT_INFO = [
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "dynamic degree",
    "imaging quality",
    ]

QUALITY_LIST = [ 
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "imaging quality",
    "dynamic degree",]

SEMANTIC_LIST = [
    "object class",
    "multiple objects",
    "human action",
    "color",
    "spatial relationship",
    "scene",
    "appearance style",
    "temporal style",
    "overall consistency"
]

QUALITY_TAB = [ 
    "subject consistency",
    "background consistency",
    "motion smoothness",
    "aesthetic quality",
    "imaging quality",
    "dynamic degree",]

I2V_LIST = [
    "Video-Image Subject Consistency",
    "Video-Image Background Consistency",
]

I2V_QUALITY_LIST = [
    "Subject Consistency",
    "Background Consistency",
    "Motion Smoothness",
    "Dynamic Degree",
    "Aesthetic Quality",
    "Imaging Quality",
    "Temporal Flickering"
]

I2V_TAB = [
    "Video-Text Camera Motion",
    "Video-Image Subject Consistency",
    "Video-Image Background Consistency",
    "Subject Consistency",
    "Background Consistency",
    "Motion Smoothness",
    "Dynamic Degree",
    "Aesthetic Quality",
    "Imaging Quality",
    "Temporal Flickering"
]

DIM_WEIGHT = {
"subject consistency":1,
"background consistency":1,
"temporal flickering":1,
"motion smoothness":1,
"aesthetic quality":1,
"imaging quality":1,
"dynamic degree":0.5,
"object class":1,
"multiple objects":1,
"human action":1,
"color":1,
"spatial relationship":1,
"scene":1,
"appearance style":1,
"temporal style":1,
"overall consistency":1
}

DIM_WEIGHT_I2V = {
"Video-Text Camera Motion": 0.1,
"Video-Image Subject Consistency": 1,
"Video-Image Background Consistency": 1,
"Subject Consistency": 1,
"Background Consistency": 1,
"Motion Smoothness": 1,
"Dynamic Degree": 0.5,
"Aesthetic Quality": 1,
"Imaging Quality": 1,
"Temporal Flickering": 1
}

SEMANTIC_WEIGHT = 1
QUALITY_WEIGHT = 4
I2V_WEIGHT = 1.0
I2V_QUALITY_WEIGHT = 1.0

DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
I2V_TITILE_TYPE =  ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']

SUBMISSION_NAME = "vbench_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
CSV_DIR = "./vbench_leaderboard_submission/results.csv"
QUALITY_DIR = "./vbench_leaderboard_submission/quality.csv"
I2V_DIR = "./vbench_leaderboard_submission/i2v_results.csv"

COLUMN_NAMES = MODEL_INFO + TASK_INFO
COLUMN_NAMES_QUALITY = MODEL_INFO_TAB_QUALITY + QUALITY_TAB
COLUMN_NAMES_I2V = MODEL_INFO_TAB_I2V + I2V_TAB

LEADERBORAD_INTRODUCTION = """# VBench Leaderboard
    
    *"Which Video Generation Model is better?"*  
    πŸ† Welcome to the leaderboard of the **VBench**! 🎦 *A Comprehensive Benchmark Suite for Video Generative Models* (**CVPR 2024**)   [![Code](https://img.shields.io/github/stars/Vchitect/VBench.svg?style=social&label=Official)](https://github.com/Vchitect/VBench) 
    <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
    <a href='https://arxiv.org/abs/2311.17982'><img src='https://img.shields.io/badge/cs.CV-Paper-b31b1b?logo=arxiv&logoColor=red'></a>
    <a href='https://vchitect.github.io/VBench-project/'><img src='https://img.shields.io/badge/VBench-Website-green?logo=googlechrome&logoColor=green'></a>
    <a href='https://pypi.org/project/vbench/'><img src='https://img.shields.io/pypi/v/vbench'></a>
    <a href='https://www.youtube.com/watch?v=7IhCC8Qqn8Y'><img src='https://img.shields.io/badge/YouTube-Video-c4302b?logo=youtube&logoColor=red'></a>
    <a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FVchitect%2FVBench&count_bg=%23FFA500&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=visitors&edge_flat=false'></a>
    </div>
    
    - **Comprehensive Dimensions:** We carefully decompose video generation quality into 16 comprehensive dimensions to reveal individual model's strengths and weaknesses.
    - **Human Alignment:** We conducted extensive experiments and human annotations to validate robustness of VBench.
    - **Valuable Insights:** VBench provides multi-perspective insights useful for the community.  
    
    Please follow the instructions in [VBench](https://github.com/Vchitect/VBench?tab=readme-ov-file#usage) to upload the generated `result.json` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
    """

SUBMIT_INTRODUCTION = """# Submit on VBench Benchmark Introduction

## ⚠ Please note that you need to obtain the file `evaluation_results/*eval_results.json` by running [VBench Github](https:) and upload the evaluation results. 
    Uploading generated videos or images of the model is invalid!
"""

TABLE_INTRODUCTION = """
    """

LEADERBORAD_INFO = """
       VBench, a comprehensive benchmark suite for video generative models. We design a comprehensive and hierarchical Evaluation Dimension Suite to decompose "video generation quality" into multiple well-defined dimensions to facilitate fine-grained and objective evaluation. For each dimension and each content category, we carefully design a Prompt Suite as test cases, and sample Generated Videos from a set of video generation models. For each evaluation dimension, we specifically design an Evaluation Method Suite, which uses carefully crafted method or designated pipeline for automatic objective evaluation. We also conduct Human Preference Annotation for the generated videos for each dimension, and show that VBench evaluation results are well aligned with human perceptions. VBench can provide valuable insights from multiple perspectives.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@inproceedings{huang2023vbench,
     title={{VBench}: Comprehensive Benchmark Suite for Video Generative Models},
     author={Huang, Ziqi and He, Yinan and Yu, Jiashuo and Zhang, Fan and Si, Chenyang and Jiang, Yuming and Zhang, Yuanhan and Wu, Tianxing and Jin, Qingyang and Chanpaisit, Nattapol and Wang, Yaohui and Chen, Xinyuan and Wang, Limin and Lin, Dahua and Qiao, Yu and Liu, Ziwei},
     booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
     year={2024}
}"""

QUALITY_CLAIM_TEXT = "We use all the videos on Sora website (https://openai.com/sora) for a preliminary evaluation, including the failure case videos Sora provided."

I2V_CLAIM_TEXT = "Since the open-sourced SVD models do not accept text input during the I2V stage, we are unable to evaluate its `camera motion` in terms of `video-text consistency`. The total score is calculated based on all dimensions except `camera motion`."

NORMALIZE_DIC = {
  "subject consistency": {"Min": 0.1462, "Max": 1.0},
  "background consistency": {"Min": 0.2615, "Max": 1.0},
  "temporal flickering": {"Min": 0.6293, "Max": 1.0},
  "motion smoothness": {"Min": 0.706, "Max": 0.9975},
  "dynamic degree": {"Min": 0.0, "Max": 1.0},
  "aesthetic quality": {"Min": 0.0, "Max": 1.0},
  "imaging quality": {"Min": 0.0, "Max": 1.0},
  "object class": {"Min": 0.0, "Max": 1.0},
  "multiple objects": {"Min": 0.0, "Max": 1.0},
  "human action": {"Min": 0.0, "Max": 1.0},
  "color": {"Min": 0.0, "Max": 1.0},
  "spatial relationship": {"Min": 0.0, "Max": 1.0},
  "scene": {"Min": 0.0, "Max": 0.8222},
  "appearance style": {"Min": 0.0009, "Max": 0.2855},
  "temporal style": {"Min": 0.0, "Max": 0.364},
  "overall consistency": {"Min": 0.0, "Max": 0.364}
}

NORMALIZE_DIC_I2V = {
    "Video-Text Camera Motion" :{"Min": 0.0, "Max":1.0 },
    "Video-Image Subject Consistency":{"Min": 0.1462, "Max": 1.0},
    "Video-Image Background Consistency":{"Min": 0.2615, "Max":1.0 },
    "Subject Consistency":{"Min": 0.1462, "Max": 1.0},
    "Background Consistency":{"Min": 0.2615, "Max": 1.0 },
    "Motion Smoothness":{"Min": 0.7060, "Max": 0.9975},
    "Dynamic Degree":{"Min": 0.0, "Max": 1.0},
    "Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
    "Imaging Quality":{"Min": 0.0, "Max": 1.0},
    "Temporal Flickering":{"Min":0.6293, "Max": 1.0}
}