zhuohan-7 commited on
Commit
427bf8e
·
verified ·
1 Parent(s): c90636b

Upload folder using huggingface_hub

Browse files
results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv CHANGED
@@ -3,6 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.512987012987013,0.4394480519480519,0.47337850486110
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
4
  Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
5
  Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
 
6
  merged_llama3_8b_sg_inst_avg_diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
7
  SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
8
  gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
 
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
4
  Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
5
  Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
6
+ Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
7
  merged_llama3_8b_sg_inst_avg_diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
8
  SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
9
  gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.6876190476190477,0.5615238095238096,0.6182070607559
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
4
  Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
5
  Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
6
- Meta-Llama-3-8B-Instruct,0.6428571428571429,0.49542857142857133,0.5595955249078094,0.7666666666666667,0.6533333333333333,0.7,0.6466666666666666,0.5733333333333334,0.5733333333333334,0.5866666666666667
7
  merged_llama3_8b_sg_inst_avg_diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
8
  SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
9
  gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
 
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
4
  Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
5
  Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
6
+ Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
7
  merged_llama3_8b_sg_inst_avg_diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
8
  SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
9
  gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.9168067226890756,0.8292016806722688,0.8708064334608
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
4
  Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
5
  Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
6
- Meta-Llama-3-8B-Instruct,0.9060924369747899,0.8224789915966386,0.8622634639161603,0.9319327731092437,0.8932773109243698,0.9134453781512605,0.8857142857142857,,,
7
  merged_llama3_8b_sg_inst_avg_diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
8
  SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
9
  gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
 
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
4
  Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
5
  Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
6
+ Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
7
  merged_llama3_8b_sg_inst_avg_diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
8
  SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
9
  gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.7418181818181818
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
4
  Qwen2_5_7B_Instruct,0.7654545454545455
5
  Sailor2-8B-Chat,0.7145454545454546
6
- Meta-Llama-3-8B-Instruct,0.8054545454545454
7
  merged_llama3_8b_sg_inst_avg_diff,0.7854545454545454
8
  SeaLLMs-v3-7B-Chat,0.7581818181818182
9
  gemma-2-9b-it,0.7618181818181818
 
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
4
  Qwen2_5_7B_Instruct,0.7654545454545455
5
  Sailor2-8B-Chat,0.7145454545454546
6
+ Meta-Llama-3-8B-Instruct,0.8290909090909091
7
  merged_llama3_8b_sg_inst_avg_diff,0.7854545454545454
8
  SeaLLMs-v3-7B-Chat,0.7581818181818182
9
  gemma-2-9b-it,0.7618181818181818
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.3923422946746861
6
  Qwen2_5_7B_Instruct,0.36472669481333536
7
  Qwen2_5_1_5B_Instruct,0.2624938515155373
8
  Qwen2-72B-Instruct,0.4043588265556185
9
- Sailor2-8B-Chat,0.2487972955646591
10
  Meta-Llama-3-8B-Instruct,0.33079891679041123
11
  merged_llama3_8b_sg_inst_avg_diff,0.38376586000725804
12
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
 
6
  Qwen2_5_7B_Instruct,0.36472669481333536
7
  Qwen2_5_1_5B_Instruct,0.2624938515155373
8
  Qwen2-72B-Instruct,0.4043588265556185
 
9
  Meta-Llama-3-8B-Instruct,0.33079891679041123
10
  merged_llama3_8b_sg_inst_avg_diff,0.38376586000725804
11
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.33791529833420336
6
  Qwen2_5_7B_Instruct,0.3027564749728372
7
  Qwen2_5_1_5B_Instruct,0.21935649300365245
8
  Qwen2-72B-Instruct,0.33005323227052946
9
- Sailor2-8B-Chat,0.1825857920682635
10
  Meta-Llama-3-8B-Instruct,0.2647448190950291
11
  merged_llama3_8b_sg_inst_avg_diff,0.30900856944791294
12
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
 
6
  Qwen2_5_7B_Instruct,0.3027564749728372
7
  Qwen2_5_1_5B_Instruct,0.21935649300365245
8
  Qwen2-72B-Instruct,0.33005323227052946
 
9
  Meta-Llama-3-8B-Instruct,0.2647448190950291
10
  merged_llama3_8b_sg_inst_avg_diff,0.30900856944791294
11
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.26924811164378015
6
  Qwen2_5_7B_Instruct,0.2437311220019033
7
  Qwen2_5_1_5B_Instruct,0.18420680441018222
8
  Qwen2-72B-Instruct,0.23893268538329387
9
- Sailor2-8B-Chat,0.16539980828035464
10
  Meta-Llama-3-8B-Instruct,0.199495011482748
11
  merged_llama3_8b_sg_inst_avg_diff,0.24133164017585856
12
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
 
6
  Qwen2_5_7B_Instruct,0.2437311220019033
7
  Qwen2_5_1_5B_Instruct,0.18420680441018222
8
  Qwen2-72B-Instruct,0.23893268538329387
 
9
  Meta-Llama-3-8B-Instruct,0.199495011482748
10
  merged_llama3_8b_sg_inst_avg_diff,0.24133164017585856
11
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -6,7 +6,6 @@ Qwen2_5_32B_Instruct,0.40310877536446654
6
  Qwen2_5_7B_Instruct,0.3466422765302921
7
  Qwen2_5_1_5B_Instruct,0.22890805100949677
8
  Qwen2-72B-Instruct,0.40796892621611885
9
- Sailor2-8B-Chat,0.269986448536842
10
  Meta-Llama-3-8B-Instruct,0.31625368345049
11
  merged_llama3_8b_sg_inst_avg_diff,0.3729790018011108
12
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
 
6
  Qwen2_5_7B_Instruct,0.3466422765302921
7
  Qwen2_5_1_5B_Instruct,0.22890805100949677
8
  Qwen2-72B-Instruct,0.40796892621611885
 
9
  Meta-Llama-3-8B-Instruct,0.31625368345049
10
  merged_llama3_8b_sg_inst_avg_diff,0.3729790018011108
11
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
results/general_reasoning/zero_shot/indommlu_no_prompt.csv CHANGED
@@ -3,7 +3,7 @@ Meta-Llama-3.1-8B-Instruct,0.5483009546698712
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
4
  Qwen2_5_7B_Instruct,0.581814540356499
5
  Sailor2-8B-Chat,0.6342212430736365
6
- Meta-Llama-3-8B-Instruct,0.5207957807597303
7
  merged_llama3_8b_sg_inst_avg_diff,0.575806128580012
8
  SeaLLMs-v3-7B-Chat,0.5406235396221376
9
  gemma-2-9b-it,0.6210027371653648
 
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
4
  Qwen2_5_7B_Instruct,0.581814540356499
5
  Sailor2-8B-Chat,0.6342212430736365
6
+ Meta-Llama-3-8B-Instruct,0.537686093864744
7
  merged_llama3_8b_sg_inst_avg_diff,0.575806128580012
8
  SeaLLMs-v3-7B-Chat,0.5406235396221376
9
  gemma-2-9b-it,0.6210027371653648
results/general_reasoning/zero_shot/mmlu_no_prompt.csv CHANGED
@@ -2,7 +2,7 @@ Model,Accuracy
2
  Meta-Llama-3.1-8B-Instruct,0.7056131569538792
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
4
  Qwen2_5_7B_Instruct,0.73936360386128
5
- Meta-Llama-3-8B-Instruct,0.6618519842688595
6
  merged_llama3_8b_sg_inst_avg_diff,0.6988916696460493
7
  SeaLLMs-v3-7B-Chat,0.6913836253128351
8
  gemma-2-9b-it,0.740293171254916
 
2
  Meta-Llama-3.1-8B-Instruct,0.7056131569538792
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
4
  Qwen2_5_7B_Instruct,0.73936360386128
5
+ Meta-Llama-3-8B-Instruct,0.6735788344654987
6
  merged_llama3_8b_sg_inst_avg_diff,0.6988916696460493
7
  SeaLLMs-v3-7B-Chat,0.6913836253128351
8
  gemma-2-9b-it,0.740293171254916