Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv +1 -0
- results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv +1 -1
- results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv +1 -1
- results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv +1 -1
- results/flores_translation/zero_shot/ind2eng.csv +0 -1
- results/flores_translation/zero_shot/vie2eng.csv +0 -1
- results/flores_translation/zero_shot/zho2eng.csv +0 -1
- results/flores_translation/zero_shot/zsm2eng.csv +0 -1
- results/general_reasoning/zero_shot/indommlu_no_prompt.csv +1 -1
- results/general_reasoning/zero_shot/mmlu_no_prompt.csv +1 -1
results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv
CHANGED
@@ -3,6 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.512987012987013,0.4394480519480519,0.47337850486110
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
|
4 |
Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
|
5 |
Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
|
|
|
6 |
merged_llama3_8b_sg_inst_avg_diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
|
7 |
SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
|
8 |
gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
|
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
|
4 |
Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
|
5 |
Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
|
6 |
+
Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
|
8 |
SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
|
9 |
gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
|
results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv
CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.6876190476190477,0.5615238095238096,0.6182070607559
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
|
4 |
Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
|
5 |
Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
|
6 |
-
Meta-Llama-3-8B-Instruct,0.
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
|
8 |
SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
|
9 |
gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
|
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
|
4 |
Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
|
5 |
Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
|
6 |
+
Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
|
8 |
SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
|
9 |
gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
|
results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv
CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.9168067226890756,0.8292016806722688,0.8708064334608
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
|
4 |
Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
|
5 |
Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
|
6 |
-
Meta-Llama-3-8B-Instruct,0.
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
|
8 |
SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
|
9 |
gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
|
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
|
4 |
Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
|
5 |
Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
|
6 |
+
Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
|
8 |
SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
|
9 |
gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
|
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv
CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.7418181818181818
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
|
4 |
Qwen2_5_7B_Instruct,0.7654545454545455
|
5 |
Sailor2-8B-Chat,0.7145454545454546
|
6 |
-
Meta-Llama-3-8B-Instruct,0.
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.7854545454545454
|
8 |
SeaLLMs-v3-7B-Chat,0.7581818181818182
|
9 |
gemma-2-9b-it,0.7618181818181818
|
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
|
4 |
Qwen2_5_7B_Instruct,0.7654545454545455
|
5 |
Sailor2-8B-Chat,0.7145454545454546
|
6 |
+
Meta-Llama-3-8B-Instruct,0.8290909090909091
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.7854545454545454
|
8 |
SeaLLMs-v3-7B-Chat,0.7581818181818182
|
9 |
gemma-2-9b-it,0.7618181818181818
|
results/flores_translation/zero_shot/ind2eng.csv
CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.3923422946746861
|
|
6 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
7 |
Qwen2_5_1_5B_Instruct,0.2624938515155373
|
8 |
Qwen2-72B-Instruct,0.4043588265556185
|
9 |
-
Sailor2-8B-Chat,0.2487972955646591
|
10 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
11 |
merged_llama3_8b_sg_inst_avg_diff,0.38376586000725804
|
12 |
Meta-Llama-3.1-70B-Instruct,0.43366494500251235
|
|
|
6 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
7 |
Qwen2_5_1_5B_Instruct,0.2624938515155373
|
8 |
Qwen2-72B-Instruct,0.4043588265556185
|
|
|
9 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
10 |
merged_llama3_8b_sg_inst_avg_diff,0.38376586000725804
|
11 |
Meta-Llama-3.1-70B-Instruct,0.43366494500251235
|
results/flores_translation/zero_shot/vie2eng.csv
CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.33791529833420336
|
|
6 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
7 |
Qwen2_5_1_5B_Instruct,0.21935649300365245
|
8 |
Qwen2-72B-Instruct,0.33005323227052946
|
9 |
-
Sailor2-8B-Chat,0.1825857920682635
|
10 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
11 |
merged_llama3_8b_sg_inst_avg_diff,0.30900856944791294
|
12 |
Meta-Llama-3.1-70B-Instruct,0.37244508311079816
|
|
|
6 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
7 |
Qwen2_5_1_5B_Instruct,0.21935649300365245
|
8 |
Qwen2-72B-Instruct,0.33005323227052946
|
|
|
9 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
10 |
merged_llama3_8b_sg_inst_avg_diff,0.30900856944791294
|
11 |
Meta-Llama-3.1-70B-Instruct,0.37244508311079816
|
results/flores_translation/zero_shot/zho2eng.csv
CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.26924811164378015
|
|
6 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
7 |
Qwen2_5_1_5B_Instruct,0.18420680441018222
|
8 |
Qwen2-72B-Instruct,0.23893268538329387
|
9 |
-
Sailor2-8B-Chat,0.16539980828035464
|
10 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
11 |
merged_llama3_8b_sg_inst_avg_diff,0.24133164017585856
|
12 |
Meta-Llama-3.1-70B-Instruct,0.2832594176173152
|
|
|
6 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
7 |
Qwen2_5_1_5B_Instruct,0.18420680441018222
|
8 |
Qwen2-72B-Instruct,0.23893268538329387
|
|
|
9 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
10 |
merged_llama3_8b_sg_inst_avg_diff,0.24133164017585856
|
11 |
Meta-Llama-3.1-70B-Instruct,0.2832594176173152
|
results/flores_translation/zero_shot/zsm2eng.csv
CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.40310877536446654
|
|
6 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
7 |
Qwen2_5_1_5B_Instruct,0.22890805100949677
|
8 |
Qwen2-72B-Instruct,0.40796892621611885
|
9 |
-
Sailor2-8B-Chat,0.269986448536842
|
10 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
11 |
merged_llama3_8b_sg_inst_avg_diff,0.3729790018011108
|
12 |
Meta-Llama-3.1-70B-Instruct,0.4462132282683508
|
|
|
6 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
7 |
Qwen2_5_1_5B_Instruct,0.22890805100949677
|
8 |
Qwen2-72B-Instruct,0.40796892621611885
|
|
|
9 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
10 |
merged_llama3_8b_sg_inst_avg_diff,0.3729790018011108
|
11 |
Meta-Llama-3.1-70B-Instruct,0.4462132282683508
|
results/general_reasoning/zero_shot/indommlu_no_prompt.csv
CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.5483009546698712
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
|
4 |
Qwen2_5_7B_Instruct,0.581814540356499
|
5 |
Sailor2-8B-Chat,0.6342212430736365
|
6 |
-
Meta-Llama-3-8B-Instruct,0.
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.575806128580012
|
8 |
SeaLLMs-v3-7B-Chat,0.5406235396221376
|
9 |
gemma-2-9b-it,0.6210027371653648
|
|
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
|
4 |
Qwen2_5_7B_Instruct,0.581814540356499
|
5 |
Sailor2-8B-Chat,0.6342212430736365
|
6 |
+
Meta-Llama-3-8B-Instruct,0.537686093864744
|
7 |
merged_llama3_8b_sg_inst_avg_diff,0.575806128580012
|
8 |
SeaLLMs-v3-7B-Chat,0.5406235396221376
|
9 |
gemma-2-9b-it,0.6210027371653648
|
results/general_reasoning/zero_shot/mmlu_no_prompt.csv
CHANGED
@@ -2,7 +2,7 @@ Model,Accuracy
|
|
2 |
Meta-Llama-3.1-8B-Instruct,0.7056131569538792
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
|
4 |
Qwen2_5_7B_Instruct,0.73936360386128
|
5 |
-
Meta-Llama-3-8B-Instruct,0.
|
6 |
merged_llama3_8b_sg_inst_avg_diff,0.6988916696460493
|
7 |
SeaLLMs-v3-7B-Chat,0.6913836253128351
|
8 |
gemma-2-9b-it,0.740293171254916
|
|
|
2 |
Meta-Llama-3.1-8B-Instruct,0.7056131569538792
|
3 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
|
4 |
Qwen2_5_7B_Instruct,0.73936360386128
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6735788344654987
|
6 |
merged_llama3_8b_sg_inst_avg_diff,0.6988916696460493
|
7 |
SeaLLMs-v3-7B-Chat,0.6913836253128351
|
8 |
gemma-2-9b-it,0.740293171254916
|