Spaces:
Running
Running
AtsuMiyai
commited on
Commit
•
62d48db
1
Parent(s):
3e8020b
change input file format to json
Browse files- app.py +13 -6
- constants.py +5 -5
app.py
CHANGED
@@ -9,6 +9,7 @@ from collections import defaultdict
|
|
9 |
from constants import *
|
10 |
import os
|
11 |
from huggingface_hub import Repository
|
|
|
12 |
|
13 |
|
14 |
global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
|
@@ -34,6 +35,12 @@ def upload_file(files):
|
|
34 |
return file_paths
|
35 |
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Accuracy Report
|
38 |
def report_acc(df, groupd='category', metric_type="dual"):
|
39 |
assert 'split' in df
|
@@ -88,8 +95,8 @@ def eval_result_dual(data_main, metric_type="dual"):
|
|
88 |
return overall, leaf
|
89 |
|
90 |
|
91 |
-
def calculate_score(
|
92 |
-
dual_df =
|
93 |
overall_dual, leaf_dual = eval_result_dual(dual_df)
|
94 |
overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
|
95 |
overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
|
@@ -98,8 +105,8 @@ def calculate_score(dual_df_path):
|
|
98 |
|
99 |
|
100 |
# add the new data into the queue
|
101 |
-
def add_queue(base_df,
|
102 |
-
dual_df =
|
103 |
base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
|
104 |
base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
|
105 |
base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
|
@@ -109,9 +116,9 @@ def add_queue(base_df, dual_df_path, model_name):
|
|
109 |
|
110 |
|
111 |
# check whether the input file is correct or not
|
112 |
-
def validity_check(
|
113 |
|
114 |
-
input_df =
|
115 |
|
116 |
# check for the correct data size
|
117 |
data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}
|
|
|
9 |
from constants import *
|
10 |
import os
|
11 |
from huggingface_hub import Repository
|
12 |
+
import json
|
13 |
|
14 |
|
15 |
global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
|
|
|
35 |
return file_paths
|
36 |
|
37 |
|
38 |
+
def create_dual_df(input_file):
|
39 |
+
with open(input_file, 'r') as f:
|
40 |
+
data = json.load(f)
|
41 |
+
return pd.DataFrame(data)
|
42 |
+
|
43 |
+
|
44 |
# Accuracy Report
|
45 |
def report_acc(df, groupd='category', metric_type="dual"):
|
46 |
assert 'split' in df
|
|
|
95 |
return overall, leaf
|
96 |
|
97 |
|
98 |
+
def calculate_score(input_path):
|
99 |
+
dual_df = create_dual_df(input_path)
|
100 |
overall_dual, leaf_dual = eval_result_dual(dual_df)
|
101 |
overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
|
102 |
overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
|
|
|
105 |
|
106 |
|
107 |
# add the new data into the queue
|
108 |
+
def add_queue(base_df, input_path, model_name):
|
109 |
+
dual_df = create_dual_df(input_path)
|
110 |
base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
|
111 |
base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
|
112 |
base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
|
|
|
116 |
|
117 |
|
118 |
# check whether the input file is correct or not
|
119 |
+
def validity_check(input_path, UPD_type, question_type):
|
120 |
|
121 |
+
input_df = create_dual_df(input_path)
|
122 |
|
123 |
# check for the correct data size
|
124 |
data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}
|
constants.py
CHANGED
@@ -39,18 +39,18 @@ LEADERBORAD_INTRODUCTION = """
|
|
39 |
- **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
|
40 |
- **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
|
41 |
|
42 |
-
Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.
|
43 |
"""
|
44 |
|
45 |
|
46 |
SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
|
47 |
-
1. Obtain Dual Result
|
48 |
2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
|
49 |
3. Please provide the correct link of your model's repository for each submission.
|
50 |
4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
|
51 |
|
52 |
-
Note: The example of the submitted
|
53 |
-
You need to care about whether (i) the
|
54 |
|
55 |
## Submit Example
|
56 |
If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
|
@@ -63,7 +63,7 @@ SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
|
|
63 |
7. Fill in 'LLM model' if you select Others for 'LLM Type'.
|
64 |
8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
|
65 |
9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
|
66 |
-
10. Upload results.
|
67 |
11. Click the 'Submit Eval' button.
|
68 |
12. Click 'Refresh' to obtain the uploaded leaderboard.
|
69 |
|
|
|
39 |
- **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
|
40 |
- **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
|
41 |
|
42 |
+
Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.json` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
|
43 |
"""
|
44 |
|
45 |
|
46 |
SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
|
47 |
+
1. Obtain Dual Result JSON File from our [github repository](https://github.com/AtsuMiyai/UPD/tree/main/scripts/inference).
|
48 |
2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
|
49 |
3. Please provide the correct link of your model's repository for each submission.
|
50 |
4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
|
51 |
|
52 |
+
Note: The example of the submitted JSON file is this url: [llava1.5_13b_result_dual_detail_submission.json](https://drive.google.com/file/d/1ILYlxcKC_a5Jrm7kyyqeHo0vo3WjkA1V/view?usp=sharing).
|
53 |
+
You need to care about whether (i) the JSON file has the prediction for all data, (ii) the data on all options, "hit_upd", "hit_standard", and "hit" exist.
|
54 |
|
55 |
## Submit Example
|
56 |
If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
|
|
|
63 |
7. Fill in 'LLM model' if you select Others for 'LLM Type'.
|
64 |
8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
|
65 |
9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
|
66 |
+
10. Upload results.json.
|
67 |
11. Click the 'Submit Eval' button.
|
68 |
12. Click 'Refresh' to obtain the uploaded leaderboard.
|
69 |
|