Jimin Huang commited on
Commit
2f1ff79
1 Parent(s): b44eb8b

feat: modify leaderboard

Browse files
Files changed (2) hide show
  1. app.py +7 -0
  2. task1_result.csv +9 -0
app.py CHANGED
@@ -10,6 +10,7 @@ TASK1_COLS = [
10
  ("Acc", "number"),
11
  ("F1", "number"),
12
  ("MCC", "number"),
 
13
  ]
14
 
15
  TASK2_COLS = [
@@ -19,6 +20,7 @@ TASK2_COLS = [
19
  ("Rouge-L", "number"),
20
  ("BertScore", "number"),
21
  ("BartScore", "number"),
 
22
  ]
23
 
24
  TASK3_COLS = [
@@ -88,12 +90,17 @@ Our leaderboard incorporates a comprehensive evaluation using diverse metrics li
88
  - **Dataset:** 291 data points.
89
  - **Evaluation Metrics:** Sharpe Ratio (final ranking metric), Cumulative Return, Daily and Annualized Volatility, Maximum Drawdown.
90
 
 
 
 
 
91
  For more details, refer to our [Challenge page](https://sites.google.com/nlg.csie.ntu.edu.tw/finnlp-agentscen/shared-task-finllm?authuser=0).
92
  """
93
 
94
 
95
  def create_data_interface(df):
96
  headers = df.columns
 
97
  types = ["str"] + ["number"] * (len(headers) - 1)
98
 
99
  return gr.components.Dataframe(
 
10
  ("Acc", "number"),
11
  ("F1", "number"),
12
  ("MCC", "number"),
13
+ ("DTL", "number"),
14
  ]
15
 
16
  TASK2_COLS = [
 
20
  ("Rouge-L", "number"),
21
  ("BertScore", "number"),
22
  ("BartScore", "number"),
23
+ ("DTL", "number"),
24
  ]
25
 
26
  TASK3_COLS = [
 
90
  - **Dataset:** 291 data points.
91
  - **Evaluation Metrics:** Sharpe Ratio (final ranking metric), Cumulative Return, Daily and Annualized Volatility, Maximum Drawdown.
92
 
93
+ **Model Cheating Detection: Data Leakage Test (DLT)**
94
+
95
+ To measure the risk of data leakage from the test set used in training, we introduce the Data Leakage Test (DLT). The DLT calculates the difference in perplexity between the training set and the test set. A larger difference indicates a lower likelihood of model cheating, while a smaller difference suggests a higher likelihood.
96
+
97
  For more details, refer to our [Challenge page](https://sites.google.com/nlg.csie.ntu.edu.tw/finnlp-agentscen/shared-task-finllm?authuser=0).
98
  """
99
 
100
 
101
  def create_data_interface(df):
102
  headers = df.columns
103
+ print (headers)
104
  types = ["str"] + ["number"] * (len(headers) - 1)
105
 
106
  return gr.components.Dataframe(
task1_result.csv CHANGED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [email protected],0.7626,0.5237,0.7427,38.9031
2
+ [email protected],0.7575,0.5174,0.7555
3
+ [email protected],0.7544,0.5149,0.7581,2.2565
4
+ [email protected],0.7513,0.5018,0.7406
5
+ [email protected],0.7286,0.4554,0.7008
6
+ catmemo,0.711,0.4199,0.6818
7
+ [email protected],0.709,0.4166,0.6941
8
+ [email protected],0.7079,0.4141,0.69
9
+ [email protected],0.4933,0.0141,0.5905