File size: 6,360 Bytes
2ae3b27
 
35a89f7
aeb0da7
f063aad
8d72cac
f063aad
 
35a89f7
c1f5a69
1452546
 
 
 
 
 
 
 
 
 
2ae3b27
1452546
2ae3b27
1452546
 
 
2ae3b27
c1f5a69
1452546
 
 
 
 
 
4d42795
2ae3b27
59f1f35
1452546
 
 
 
 
 
 
59f1f35
 
1452546
 
 
59f1f35
1452546
59f1f35
1452546
 
 
59f1f35
 
85ae27d
1452546
 
 
 
 
 
 
 
 
 
0988648
1452546
0988648
1452546
 
 
85ae27d
 
ba77b4b
 
 
a995534
2ae3b27
 
 
 
 
 
 
df9540e
2ae3b27
8aaab52
480768c
 
 
 
 
 
 
 
46446c7
8aaab52
 
 
 
df9540e
2ae3b27
 
 
481d9fe
 
 
2ae3b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1f5a69
 
 
1452546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1f5a69
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
# this is .py for store constants 
MODEL_INFO = [
    "Model Name (clickable)",
    "Selected Score",
    "Total Score",
    "Quality Score",
    "Semantic Score",
    ]
TASK_INFO = [
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "imaging quality",
    "dynamic degree",
    "object class",
    "multiple objects",
    "human action",
    "color",
    "spatial relationship",
    "scene",
    "appearance style",
    "temporal style",
    "overall consistency"]

DEFAULT_INFO = [
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "imaging quality",
    ]

QUALITY_LIST = [ 
    "subject consistency",
    "background consistency",
    "temporal flickering",
    "motion smoothness",
    "aesthetic quality",
    "imaging quality",
    "dynamic degree",]

SEMANTIC_LIST = [
    "object class",
    "multiple objects",
    "human action",
    "color",
    "spatial relationship",
    "scene",
    "appearance style",
    "temporal style",
    "overall consistency"
]

DIM_WEIGHT = {
"subject consistency":1,
"background consistency":1,
"temporal flickering":1,
"motion smoothness":1,
"aesthetic quality":1,
"imaging quality":1,
"dynamic degree":0.5,
"object class":1,
"multiple objects":1,
"human action":1,
"color":1,
"spatial relationship":1,
"scene":1,
"appearance style":1,
"temporal style":1,
"overall consistency":1
}

SEMANTIC_WEIGHT = 1
QUALITY_WEIGHT = 4

DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']

SUBMISSION_NAME = "vbench_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
CSV_DIR = "./vbench_leaderboard_submission/results.csv"

COLUMN_NAMES = MODEL_INFO + TASK_INFO

LEADERBORAD_INTRODUCTION = """# VBench Leaderboard
    
    *"Which Video Generation Model is better?"*  
    πŸ† Welcome to the leaderboard of the **VBench**! 🎦 *A Comprehensive Benchmark Suite for Video Generative Models*    
    <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
    <a href='https://arxiv.org/abs/2311.17982'><img src='https://img.shields.io/badge/cs.CV-Paper-b31b1b?logo=arxiv&logoColor=red'></a>
    <a href='https://vchitect.github.io/VBench-project/'><img src='https://img.shields.io/badge/VBench-Website-green?logo=googlechrome&logoColor=green'></a>
    <a href='https://pypi.org/project/vbench/'><img src='https://img.shields.io/pypi/v/vbench'></a>
    <a href='https://www.youtube.com/watch?v=7IhCC8Qqn8Y'><img src='https://img.shields.io/badge/YouTube-Video-c4302b?logo=youtube&logoColor=red'></a>
    <a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FVchitect%2FVBench&count_bg=%23FFA500&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=visitors&edge_flat=false'></a>
    </div>
    
    - **Comprehensive Dimensions:** We carefully decompose video generation quality into 16 comprehensive dimensions to reveal individual model's strengths and weaknesses.
    - **Human Alignment:** We conducted extensive experiments and human annotations to validate robustness of VBench.
    - **Valuable Insights:** VBench provides multi-perspective insights useful for the community.  
    
    Please follow the instructions in [VBench](https://github.com/Vchitect/VBench?tab=readme-ov-file#usage) to upload the generated `result.json` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
    """

SUBMIT_INTRODUCTION = """# Submit on VBench Benchmark Introduction

## ⚠ Please note that you need to obtain the file `evaluation_results/*eval_results.json` by running [VBench Github](https:) and upload the evaluation results. 
    Uploading generated videos or images of the model is invalid!
"""

TABLE_INTRODUCTION = """
    """

LEADERBORAD_INFO = """
       VBench, a comprehensive benchmark suite for video generative models. We design a comprehensive and hierarchical Evaluation Dimension Suite to decompose "video generation quality" into multiple well-defined dimensions to facilitate fine-grained and objective evaluation. For each dimension and each content category, we carefully design a Prompt Suite as test cases, and sample Generated Videos from a set of video generation models. For each evaluation dimension, we specifically design an Evaluation Method Suite, which uses carefully crafted method or designated pipeline for automatic objective evaluation. We also conduct Human Preference Annotation for the generated videos for each dimension, and show that VBench evaluation results are well aligned with human perceptions. VBench can provide valuable insights from multiple perspectives.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{huang2023vbench,
     title={{VBench}: Comprehensive Benchmark Suite for Video Generative Models},
     author={Huang, Ziqi and He, Yinan and Yu, Jiashuo and Zhang, Fan and Si, Chenyang and Jiang, Yuming and Zhang, Yuanhan and Wu, Tianxing and Jin, Qingyang and Chanpaisit, Nattapol and Wang, Yaohui and Chen, Xinyuan and Wang, Limin and Lin, Dahua and Qiao, Yu and Liu, Ziwei},
     journal={arXiv preprint arXiv:2311.17982},
     year={2023}
}"""

NORMALIZE_DIC = {
  "subject consistency": {"Min": 0.1462, "Max": 1.0},
  "background consistency": {"Min": 0.2615, "Max": 1.0},
  "temporal flickering": {"Min": 0.6293, "Max": 1.0},
  "motion smoothness": {"Min": 0.706, "Max": 0.9975},
  "dynamic degree": {"Min": 0.0, "Max": 1.0},
  "aesthetic quality": {"Min": 0.0, "Max": 1.0},
  "imaging quality": {"Min": 0.0, "Max": 1.0},
  "object class": {"Min": 0.0, "Max": 1.0},
  "multiple objects": {"Min": 0.0, "Max": 1.0},
  "human action": {"Min": 0.0, "Max": 1.0},
  "color": {"Min": 0.0, "Max": 1.0},
  "spatial relationship": {"Min": 0.0, "Max": 1.0},
  "scene": {"Min": 0.0, "Max": 0.8222},
  "appearance style": {"Min": 0.0009, "Max": 0.2855},
  "temporal style": {"Min": 0.0, "Max": 0.364},
  "overall consistency": {"Min": 0.0, "Max": 0.364}
}