zhuohan-7 commited on
Commit
e025c3d
1 Parent(s): 43c31d2

Upload folder using huggingface_hub

Browse files
app/__pycache__/pages.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/pages.cpython-310.pyc and b/app/__pycache__/pages.cpython-310.pyc differ
 
app/pages.py CHANGED
@@ -79,11 +79,11 @@ def cross_lingual_consistency():
79
  filters_levelone = ['Zero Shot', 'Few Shot']
80
  filters_leveltwo = [
81
  'Cross-MMLU',
82
- 'Cross-MMLU-No-Prompt',
83
  'Cross-XQUAD',
84
- 'Cross-XQUAD-No-Prompt',
85
  'Cross-LogiQA',
86
- 'Cross-LogiQA-No-Prompt',
87
  ]
88
 
89
  category_one_dict = {
@@ -92,12 +92,12 @@ def cross_lingual_consistency():
92
  }
93
 
94
  category_two_dict = {
95
- 'Cross-MMLU' : 'cross_mmlu',
96
- 'Cross-MMLU-No-Prompt' : 'cross_mmlu_no_prompt',
97
- 'Cross-XQUAD' : 'cross_xquad',
98
- 'Cross-XQUAD-No-Prompt' : 'cross_xquad_no_prompt',
99
- 'Cross-LogiQA' : 'cross_logiqa',
100
- 'Cross-LogiQA-No-Prompt': 'cross_logiqa_no_prompt',
101
  }
102
 
103
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
@@ -126,14 +126,14 @@ def cultural_reasoning():
126
 
127
  filters_levelone = ['Zero Shot', 'Few Shot']
128
  filters_leveltwo = [
129
- 'SG EVAL V2 MCQ',
130
- 'SG EVAL V2 MCQ No Prompt',
131
- 'SG EVAL V2 Open Ended',
132
- 'SG EVAL',
133
- 'SG EVAL V1 Cleaned',
134
- 'CN EVAL',
135
- 'PH EVAL',
136
- 'US EVAL'
137
  ]
138
 
139
  category_one_dict = {'Zero Shot': 'zero_shot',
@@ -141,14 +141,14 @@ def cultural_reasoning():
141
  }
142
 
143
  category_two_dict = {
144
- 'SG EVAL' : 'sg_eval',
145
- 'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
146
- 'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
147
- 'SG EVAL V2 MCQ No Prompt': 'sg_eval_v2_mcq_no_prompt',
148
- 'SG EVAL V2 Open Ended' : 'sg_eval_v2_open',
149
- 'US EVAL' : 'us_eval',
150
- 'CN EVAL' : 'cn_eval',
151
- 'PH EVAL' : 'ph_eval'
152
  }
153
 
154
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
@@ -172,11 +172,11 @@ def general_reasoning():
172
 
173
  filters_levelone = ['Zero Shot', 'Few Shot']
174
  filters_leveltwo = [
 
175
  'MMLU',
176
- 'MMLU-No-Prompt',
177
  'CMMLU',
178
- 'IndoMMLU',
179
- 'IndoMMLU-No-Prompt',
180
  'C-Eval',
181
  'ZBench',
182
  ]
@@ -185,13 +185,13 @@ def general_reasoning():
185
  'Few Shot': 'few_shot'}
186
 
187
  category_two_dict = {
188
- 'MMLU': 'mmlu',
189
- 'MMLU-No-Prompt': 'mmlu_no_prompt',
 
190
  'C-Eval': 'c_eval',
191
  'CMMLU': 'cmmlu',
192
  'ZBench': 'zbench',
193
- 'IndoMMLU': 'indommlu',
194
- 'IndoMMLU-No-Prompt': 'indommlu_no_prompt',
195
  }
196
 
197
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
 
79
  filters_levelone = ['Zero Shot', 'Few Shot']
80
  filters_leveltwo = [
81
  'Cross-MMLU',
82
+ #'Cross-MMLU-No-Prompt',
83
  'Cross-XQUAD',
84
+ #'Cross-XQUAD-No-Prompt',
85
  'Cross-LogiQA',
86
+ #'Cross-LogiQA-No-Prompt',
87
  ]
88
 
89
  category_one_dict = {
 
92
  }
93
 
94
  category_two_dict = {
95
+ 'Cross-MMLU' : 'cross_mmlu_no_prompt',
96
+ #'Cross-MMLU-No-Prompt' : 'cross_mmlu_no_prompt',
97
+ 'Cross-XQUAD' : 'cross_xquad_no_prompt',
98
+ #'Cross-XQUAD-No-Prompt' : 'cross_xquad_no_prompt',
99
+ 'Cross-LogiQA' : 'cross_logiqa_no_prompt',
100
+ #'Cross-LogiQA-No-Prompt': 'cross_logiqa_no_prompt',
101
  }
102
 
103
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
 
126
 
127
  filters_levelone = ['Zero Shot', 'Few Shot']
128
  filters_leveltwo = [
129
+ 'SG-EVAL-v2-MCQ',
130
+ #'SG EVAL V2 MCQ No Prompt',
131
+ 'SG-EVAL-v2-Open-Ended',
132
+ 'SG-EVAL-v1-Cleaned',
133
+ 'SG-EVAL-v1',
134
+ 'CN-EVAL',
135
+ 'PH-EVAL',
136
+ 'US-EVAL'
137
  ]
138
 
139
  category_one_dict = {'Zero Shot': 'zero_shot',
 
141
  }
142
 
143
  category_two_dict = {
144
+ 'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
145
+ 'SG-EVAL-v1' : 'sg_eval',
146
+ 'SG-EVAL-v1-Cleaned' : 'sg_eval_v1_cleaned',
147
+ # 'SG EVAL V2 MCQ No Prompt': 'sg_eval_v2_mcq_no_prompt',
148
+ 'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
149
+ 'US-EVAL' : 'us_eval',
150
+ 'CN-EVAL' : 'cn_eval',
151
+ 'PH-EVAL' : 'ph_eval'
152
  }
153
 
154
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
 
172
 
173
  filters_levelone = ['Zero Shot', 'Few Shot']
174
  filters_leveltwo = [
175
+ 'IndoMMLU',
176
  'MMLU',
177
+ #'MMLU-No-Prompt',
178
  'CMMLU',
179
+ #'IndoMMLU-No-Prompt',
 
180
  'C-Eval',
181
  'ZBench',
182
  ]
 
185
  'Few Shot': 'few_shot'}
186
 
187
  category_two_dict = {
188
+ 'IndoMMLU': 'indommlu_no_prompt',
189
+ 'MMLU': 'mmlu_no_prompt',
190
+ #'MMLU-No-Prompt': 'mmlu_no_prompt',
191
  'C-Eval': 'c_eval',
192
  'CMMLU': 'cmmlu',
193
  'ZBench': 'zbench',
194
+ #'IndoMMLU-No-Prompt': 'indommlu_no_prompt',
 
195
  }
196
 
197
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])