AtsuMiyai commited on
Commit
62d48db
1 Parent(s): 3e8020b

change input file format to json

Browse files
Files changed (2) hide show
  1. app.py +13 -6
  2. constants.py +5 -5
app.py CHANGED
@@ -9,6 +9,7 @@ from collections import defaultdict
9
  from constants import *
10
  import os
11
  from huggingface_hub import Repository
 
12
 
13
 
14
  global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
@@ -34,6 +35,12 @@ def upload_file(files):
34
  return file_paths
35
 
36
 
 
 
 
 
 
 
37
  # Accuracy Report
38
  def report_acc(df, groupd='category', metric_type="dual"):
39
  assert 'split' in df
@@ -88,8 +95,8 @@ def eval_result_dual(data_main, metric_type="dual"):
88
  return overall, leaf
89
 
90
 
91
- def calculate_score(dual_df_path):
92
- dual_df = pd.read_excel(dual_df_path)
93
  overall_dual, leaf_dual = eval_result_dual(dual_df)
94
  overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
95
  overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
@@ -98,8 +105,8 @@ def calculate_score(dual_df_path):
98
 
99
 
100
  # add the new data into the queue
101
- def add_queue(base_df, dual_df_path, model_name):
102
- dual_df = pd.read_excel(dual_df_path)
103
  base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
104
  base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
105
  base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
@@ -109,9 +116,9 @@ def add_queue(base_df, dual_df_path, model_name):
109
 
110
 
111
  # check whether the input file is correct or not
112
- def validity_check(input, UPD_type, question_type):
113
 
114
- input_df = pd.read_excel(input)
115
 
116
  # check for the correct data size
117
  data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}
 
9
  from constants import *
10
  import os
11
  from huggingface_hub import Repository
12
+ import json
13
 
14
 
15
  global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
 
35
  return file_paths
36
 
37
 
38
+ def create_dual_df(input_file):
39
+ with open(input_file, 'r') as f:
40
+ data = json.load(f)
41
+ return pd.DataFrame(data)
42
+
43
+
44
  # Accuracy Report
45
  def report_acc(df, groupd='category', metric_type="dual"):
46
  assert 'split' in df
 
95
  return overall, leaf
96
 
97
 
98
+ def calculate_score(input_path):
99
+ dual_df = create_dual_df(input_path)
100
  overall_dual, leaf_dual = eval_result_dual(dual_df)
101
  overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
102
  overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
 
105
 
106
 
107
  # add the new data into the queue
108
+ def add_queue(base_df, input_path, model_name):
109
+ dual_df = create_dual_df(input_path)
110
  base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
111
  base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
112
  base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
 
116
 
117
 
118
  # check whether the input file is correct or not
119
+ def validity_check(input_path, UPD_type, question_type):
120
 
121
+ input_df = create_dual_df(input_path)
122
 
123
  # check for the correct data size
124
  data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}
constants.py CHANGED
@@ -39,18 +39,18 @@ LEADERBORAD_INTRODUCTION = """
39
  - **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
40
  - **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
41
 
42
- Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.xlsx` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
43
  """
44
 
45
 
46
  SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
47
- 1. Obtain Dual Result Excel File from our [github repository](https://github.com/AtsuMiyai/UPD/tree/main/scripts/inference).
48
  2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
49
  3. Please provide the correct link of your model's repository for each submission.
50
  4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
51
 
52
- Note: The example of the submitted excel file is this url: [llava1.5_13b_result_dual.xlsx](https://docs.google.com/spreadsheets/d/1Se0_iYHr6aktHFnCzwArU1ExTjL-UmeO/edit?usp=sharing&ouid=103623120947968158097&rtpof=true&sd=true).
53
- You need to care about whether (i) the excel file has the prediction for all data, (ii) the columns on hit_upd, hit_standard, and hit exist.
54
 
55
  ## Submit Example
56
  If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
@@ -63,7 +63,7 @@ SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
63
  7. Fill in 'LLM model' if you select Others for 'LLM Type'.
64
  8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
65
  9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
66
- 10. Upload results.xlsx.
67
  11. Click the 'Submit Eval' button.
68
  12. Click 'Refresh' to obtain the uploaded leaderboard.
69
 
 
39
  - **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
40
  - **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
41
 
42
+ Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.json` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
43
  """
44
 
45
 
46
  SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
47
+ 1. Obtain Dual Result JSON File from our [github repository](https://github.com/AtsuMiyai/UPD/tree/main/scripts/inference).
48
  2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
49
  3. Please provide the correct link of your model's repository for each submission.
50
  4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
51
 
52
+ Note: The example of the submitted JSON file is this url: [llava1.5_13b_result_dual_detail_submission.json](https://drive.google.com/file/d/1ILYlxcKC_a5Jrm7kyyqeHo0vo3WjkA1V/view?usp=sharing).
53
+ You need to care about whether (i) the JSON file has the prediction for all data, (ii) the data on all options, "hit_upd", "hit_standard", and "hit" exist.
54
 
55
  ## Submit Example
56
  If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
 
63
  7. Fill in 'LLM model' if you select Others for 'LLM Type'.
64
  8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
65
  9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
66
+ 10. Upload results.json.
67
  11. Click the 'Submit Eval' button.
68
  12. Click 'Refresh' to obtain the uploaded leaderboard.
69