Spaces:

MM-UPD
/

MM-UPD_Leaderboard

Running

App Files Files Community

AtsuMiyai commited on Jun 4

Commit

62d48db

•

1 Parent(s): 3e8020b

change input file format to json

Browse files

Files changed (2) hide show

app.py +13 -6
constants.py +5 -5

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from collections import defaultdict
 from constants import *
 import os
 from huggingface_hub import Repository
 global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
@@ -34,6 +35,12 @@ def upload_file(files):
     return file_paths
 # Accuracy Report
 def report_acc(df, groupd='category', metric_type="dual"):
     assert 'split' in df
@@ -88,8 +95,8 @@ def eval_result_dual(data_main, metric_type="dual"):
     return overall, leaf
-def calculate_score(dual_df_path):
-    dual_df = pd.read_excel(dual_df_path)
     overall_dual, leaf_dual = eval_result_dual(dual_df)
     overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
     overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
@@ -98,8 +105,8 @@ def calculate_score(dual_df_path):
 # add the new data into the queue
-def add_queue(base_df, dual_df_path, model_name):
-    dual_df = pd.read_excel(dual_df_path)
     base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
     base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
     base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
@@ -109,9 +116,9 @@ def add_queue(base_df, dual_df_path, model_name):
 # check whether the input file is correct or not
-def validity_check(input, UPD_type, question_type):
-    input_df = pd.read_excel(input)
     # check for the correct data size
     data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}

 from constants import *
 import os
 from huggingface_hub import Repository
+import json
 global data_component_aad, data_component_iasd, data_component_ivqd, filter_component
     return file_paths
+def create_dual_df(input_file):
+    with open(input_file, 'r') as f:
+        data = json.load(f)
+    return pd.DataFrame(data)
 # Accuracy Report
 def report_acc(df, groupd='category', metric_type="dual"):
     assert 'split' in df
     return overall, leaf
+def calculate_score(input_path):
+    dual_df = create_dual_df(input_path)
     overall_dual, leaf_dual = eval_result_dual(dual_df)
     overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
     overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")
 # add the new data into the queue
+def add_queue(base_df, input_path, model_name):
+    dual_df = create_dual_df(input_path)
     base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
     base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
     base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
 # check whether the input file is correct or not
+def validity_check(input_path, UPD_type, question_type):
+    input_df = create_dual_df(input_path)
     # check for the correct data size
     data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}

constants.py CHANGED Viewed

@@ -39,18 +39,18 @@ LEADERBORAD_INTRODUCTION = """
 - **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
 - **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
-Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.xlsx` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
 """
 SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
-    1. Obtain Dual Result Excel File from our [github repository](https://github.com/AtsuMiyai/UPD/tree/main/scripts/inference).
     2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
     3. Please provide the correct link of your model's repository for each submission.
     4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
-    Note: The example of the submitted excel file is this url: [llava1.5_13b_result_dual.xlsx](https://docs.google.com/spreadsheets/d/1Se0_iYHr6aktHFnCzwArU1ExTjL-UmeO/edit?usp=sharing&ouid=103623120947968158097&rtpof=true&sd=true).
-          You need to care about whether (i) the excel file has the prediction for all data, (ii) the columns on hit_upd, hit_standard, and hit exist.
     ## Submit Example
     If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
@@ -63,7 +63,7 @@ SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
     7. Fill in 'LLM model' if you select Others for 'LLM Type'.
     8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
     9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
-    10. Upload results.xlsx.
     11. Click the 'Submit Eval' button.
     12. Click 'Refresh' to obtain the uploaded leaderboard.

 - **Ability-wise Evaluation:** We carefully decompose each benchmark into more than 10 abilities to reveal individual model's strengths and weaknesses.
 - **Valuable Insights:** MM-UPD Bench provides multi-perspective insights on trustworthiness and reliablitity for the community.
+Please follow the instructions in [UPD](https://github.com/AtsuMiyai/UPD) to upload the generated `result_dual.json` file here. After clicking the `Submit Eval` button, click the `Refresh` button.
 """
 SUBMIT_INTRODUCTION = """# Submit on MM-UPD Benchmark Introduction
+    1. Obtain Dual Result JSON File from our [github repository](https://github.com/AtsuMiyai/UPD/tree/main/scripts/inference).
     2. If you want to update model performance by uploading new results, please ensure 'Model Name Revision' is the same as what's shown in the leaderboard. For example, if you want to modify LLaVA-1.5-13B's performance, you need to fill in 'LLaVA-1.5-13B' in 'Revision Model Name'.
     3. Please provide the correct link of your model's repository for each submission.
     4. After clicking 'Submit Eval', you can click 'Refresh' to obtain the latest result in the leaderboard.
+    Note: The example of the submitted JSON file is this url: [llava1.5_13b_result_dual_detail_submission.json](https://drive.google.com/file/d/1ILYlxcKC_a5Jrm7kyyqeHo0vo3WjkA1V/view?usp=sharing).
+          You need to care about whether (i) the JSON file has the prediction for all data, (ii) the data on all options, "hit_upd", "hit_standard", and "hit" exist.
     ## Submit Example
     If you want to upload LLaVA-1.5-13B's result in the leaderboard, you need to:
     7. Fill in 'LLM model' if you select Others for 'LLM Type'.
     8. Select 'AAD', 'IASD', or 'IVQD' in 'UPD_Type'.
     9. Select 'Base', 'Option', or 'Instruction' in 'Question Type'.
+    10. Upload results.json.
     11. Click the 'Submit Eval' button.
     12. Click 'Refresh' to obtain the uploaded leaderboard.