Spaces:

MERaLiON
/

SeaEval_Leaderboard

Running

App Files Files Community

zhuohan-7 commited on Oct 15, 2024

Commit

a40ee94

verified ·

1 Parent(s): 4c054d2

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app/draw_diagram.py +1 -0
app/pages.py +27 -9

app/draw_diagram.py CHANGED Viewed

@@ -341,6 +341,7 @@ def draw_only_acc(folder_name, category_one, category_two, sorted):
                     .stMultiSelect [data-baseweb=select] span{
                         max-width: 800px;
                         font-size: 0.9rem;
                     }
                 </style>
                 """, unsafe_allow_html=True)

                     .stMultiSelect [data-baseweb=select] span{
                         max-width: 800px;
                         font-size: 0.9rem;
+                        color: blue; /* Change text color of selected options */
                     }
                 </style>
                 """, unsafe_allow_html=True)

app/pages.py CHANGED Viewed

@@ -15,7 +15,7 @@ def dashboard():
     seaeval_url = "https://seaeval.github.io/"
     st.divider()
-    st.markdown("#### What is [SeaEval](%s)" % seaeval_url)
     with st.container():
         left_co, cent_co,last_co = st.columns(3)
@@ -26,7 +26,7 @@ def dashboard():
         st.markdown('''
                     ''')
-        st.markdown("##### A new benchmark for multilingual foundation models consisting of 28 dataset.")
         st.markdown(''':star: How models understand and reason with natural language?
                     :balloon: Languages: English, Chinese, Malay, Spainish, Indonedian, Vietnamese, Filipino.
                     ''')
@@ -104,11 +104,15 @@ def cultural_reasoning():
     st.title("Cultural Reasoning")
     filters_levelone = ['Zero Shot', 'Few Shot']
-    filters_leveltwo = ['SG EVAL',
-                        'SG EVAL V1 Cleaned',
                         'SG EVAL V2 MCQ',
                         'SG EVAL V2 Open Ended',
-                        'CN EVAL', 'PH EVAL', 'US EVAL']
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
@@ -132,7 +136,13 @@ def general_reasoning():
     st.title("General Reasoning")
     filters_levelone = ['Zero Shot', 'Few Shot']
-    filters_leveltwo = ['MMLU', 'C Eval', 'CMMLU', 'ZBench', 'IndoMMLU']
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
@@ -158,7 +168,8 @@ def flores():
     filters_leveltwo = ['Indonesian to English',
                         'Vitenamese to English',
                         'Chinese to English',
-                        'Malay to English']
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
@@ -182,7 +193,10 @@ def emotion():
     st.title("Emotion")
     filters_levelone = ['Zero Shot', 'Few Shot']
-    filters_leveltwo = ['Indonesian Emotion  Classification', 'SST2']
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
@@ -205,7 +219,11 @@ def dialogue():
     st.title("Dialogue")
     filters_levelone = ['Zero Shot', 'Few Shot']
-    filters_leveltwo = ['DREAM', 'SAMSum', 'DialogSum']
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}

     seaeval_url = "https://seaeval.github.io/"
     st.divider()
+    st.markdown("#### What is [SeaEval](%s)?" % seaeval_url)
     with st.container():
         left_co, cent_co,last_co = st.columns(3)
         st.markdown('''
                     ''')
+        st.markdown("##### A new benchmark for multilingual, multicultral foundation model evaluation consisting of 28 dataset as the core and keep expanding over time.")
         st.markdown(''':star: How models understand and reason with natural language?
                     :balloon: Languages: English, Chinese, Malay, Spainish, Indonedian, Vietnamese, Filipino.
                     ''')
     st.title("Cultural Reasoning")
     filters_levelone = ['Zero Shot', 'Few Shot']
+    filters_leveltwo = [
                         'SG EVAL V2 MCQ',
                         'SG EVAL V2 Open Ended',
+                        'SG EVAL',
+                        'SG EVAL V1 Cleaned',
+                        'CN EVAL',
+                        'PH EVAL',
+                        'US EVAL'
+                        ]
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
     st.title("General Reasoning")
     filters_levelone = ['Zero Shot', 'Few Shot']
+    filters_leveltwo = [
+                        'MMLU',
+                        'CMMLU',
+                        'IndoMMLU',
+                        'C Eval',
+                        'ZBench',
+                        ]
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
     filters_leveltwo = ['Indonesian to English',
                         'Vitenamese to English',
                         'Chinese to English',
+                        'Malay to English'
+                        ]
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
     st.title("Emotion")
     filters_levelone = ['Zero Shot', 'Few Shot']
+    filters_leveltwo = [
+                        'Indonesian Emotion  Classification',
+                        'SST2',
+                        ]
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}
     st.title("Dialogue")
     filters_levelone = ['Zero Shot', 'Few Shot']
+    filters_leveltwo = [
+                        'DREAM',
+                        'SAMSum',
+                        'DialogSum',
+                        ]
     category_one_dict = {'Zero Shot': 'zero_shot',
                          'Few Shot': 'few_shot'}