zhuohan-7 commited on
Commit
969c3ae
·
verified ·
1 Parent(s): 98c8a71

Upload folder using huggingface_hub

Browse files
results/cultural_reasoning/few_shot/sg_eval_v2_mcq.csv CHANGED
@@ -1 +1,2 @@
1
  Model,Accuracy
 
 
1
  Model,Accuracy
2
+ Meta-Llama-3-8B,0.7654545454545455
results/cultural_reasoning/few_shot/sg_eval_v2_open.csv CHANGED
@@ -1 +1,2 @@
1
  Model,Accuracy
 
 
1
  Model,Accuracy
2
+ Meta-Llama-3-8B,50.8
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -1,23 +1,32 @@
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,0.7854545454545454
 
3
  Qwen2_5_32B_Instruct,0.8436363636363636
4
  Qwen2_5_7B_Instruct,0.78
5
  Qwen2_5_1_5B_Instruct,0.6636363636363637
 
6
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
7
  Meta-Llama-3-8B-Instruct,0.7909090909090909
 
8
  Qwen2_5_3B_Instruct,0.72
9
  cross_openhermes_llama3_8b_16384_inst,0.7963636363636364
10
  cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
 
11
  Qwen2_5_72B_Instruct,0.8618181818181818
 
12
  Meta-Llama-3-70B-Instruct,0.8381818181818181
13
  Qwen2_5_14B_Instruct,0.8345454545454546
14
  sg_llama3_8192_8b,0.76
 
15
  sg_llama3_70b_inst,0.8436363636363636
16
  cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
17
  gemma-2-2b-it,0.7163636363636363
 
18
  cross_openhermes_llama3_8b_12288_inst,0.7890909090909091
19
  Qwen2_5_0_5B_Instruct,0.5727272727272728
20
  GPT4o_0513,0.8709090909090909
21
  cross_openhermes_llama3_8b_8192_inst,0.78
22
  cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
23
  cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
 
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7872727272727272
3
  Meta-Llama-3.1-8B-Instruct,0.7854545454545454
4
+ llama3-8b-cpt-sea-lionv2.1-instruct,0.7836363636363637
5
  Qwen2_5_32B_Instruct,0.8436363636363636
6
  Qwen2_5_7B_Instruct,0.78
7
  Qwen2_5_1_5B_Instruct,0.6636363636363637
8
+ Qwen2-72B-Instruct,0.8581818181818182
9
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
10
  Meta-Llama-3-8B-Instruct,0.7909090909090909
11
+ Meta-Llama-3.1-70B-Instruct,0.8763636363636363
12
  Qwen2_5_3B_Instruct,0.72
13
  cross_openhermes_llama3_8b_16384_inst,0.7963636363636364
14
  cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
15
+ SeaLLMs-v3-7B-Chat,0.7836363636363637
16
  Qwen2_5_72B_Instruct,0.8618181818181818
17
+ gemma-2-9b-it,0.8036363636363636
18
  Meta-Llama-3-70B-Instruct,0.8381818181818181
19
  Qwen2_5_14B_Instruct,0.8345454545454546
20
  sg_llama3_8192_8b,0.76
21
+ Meta-Llama-3.1-70B,0.44181818181818183
22
  sg_llama3_70b_inst,0.8436363636363636
23
  cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
24
  gemma-2-2b-it,0.7163636363636363
25
+ llama3-8b-cpt-sea-lionv2-instruct,0.7763636363636364
26
  cross_openhermes_llama3_8b_12288_inst,0.7890909090909091
27
  Qwen2_5_0_5B_Instruct,0.5727272727272728
28
  GPT4o_0513,0.8709090909090909
29
  cross_openhermes_llama3_8b_8192_inst,0.78
30
  cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
31
  cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
32
+ Meta-Llama-3.1-8B,0.4381818181818182
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -1,20 +1,27 @@
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,49.2
 
3
  Qwen2_5_32B_Instruct,53.2
4
  Qwen2_5_7B_Instruct,50.279999999999994
5
  Qwen2_5_1_5B_Instruct,44.480000000000004
 
6
  cross_openhermes_llama3_8b_4096_inst,51.6
7
  Meta-Llama-3-8B-Instruct,51.120000000000005
 
8
  Qwen2_5_3B_Instruct,47.24
9
  cross_openhermes_llama3_8b_16384_inst,52.44
10
  cross_openhermes_llama3_70b_4096_inst_2,55.8
 
11
  Qwen2_5_72B_Instruct,53.32
 
12
  Meta-Llama-3-70B-Instruct,50.599999999999994
13
  Qwen2_5_14B_Instruct,53.2
14
  sg_llama3_8192_8b,53.4
15
  sg_llama3_70b_inst,51.959999999999994
16
  cross_openhermes_llama3_8b_2048_inst,52.24
17
  gemma-2-2b-it,52.08
 
18
  cross_openhermes_llama3_8b_12288_inst,52.480000000000004
19
  Qwen2_5_0_5B_Instruct,35.28
20
  GPT4o_0513,57.28
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,56.559999999999995
3
  Meta-Llama-3.1-8B-Instruct,49.2
4
+ llama3-8b-cpt-sea-lionv2.1-instruct,50.03999999999999
5
  Qwen2_5_32B_Instruct,53.2
6
  Qwen2_5_7B_Instruct,50.279999999999994
7
  Qwen2_5_1_5B_Instruct,44.480000000000004
8
+ Qwen2-72B-Instruct,54.080000000000005
9
  cross_openhermes_llama3_8b_4096_inst,51.6
10
  Meta-Llama-3-8B-Instruct,51.120000000000005
11
+ Meta-Llama-3.1-70B-Instruct,51.31999999999999
12
  Qwen2_5_3B_Instruct,47.24
13
  cross_openhermes_llama3_8b_16384_inst,52.44
14
  cross_openhermes_llama3_70b_4096_inst_2,55.8
15
+ SeaLLMs-v3-7B-Chat,55.0
16
  Qwen2_5_72B_Instruct,53.32
17
+ gemma-2-9b-it,53.96
18
  Meta-Llama-3-70B-Instruct,50.599999999999994
19
  Qwen2_5_14B_Instruct,53.2
20
  sg_llama3_8192_8b,53.4
21
  sg_llama3_70b_inst,51.959999999999994
22
  cross_openhermes_llama3_8b_2048_inst,52.24
23
  gemma-2-2b-it,52.08
24
+ llama3-8b-cpt-sea-lionv2-instruct,50.03999999999999
25
  cross_openhermes_llama3_8b_12288_inst,52.480000000000004
26
  Qwen2_5_0_5B_Instruct,35.28
27
  GPT4o_0513,57.28
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -1,7 +1,9 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.29408553325533265
3
  Meta-Llama-3.1-8B-Instruct,0.3765752579792989
 
4
  Qwen2_5_7B_Instruct,0.36472669481333536
 
5
  Qwen2-72B-Instruct,0.4043588265556185
6
  cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.3904916300086918
20
  gemma-2-2b-it,0.3482500758113138
21
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
22
  cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
 
23
  GPT4o_0513,0.42589589086974855
24
  cross_openhermes_llama3_8b_8192_inst,0.3929315974686861
25
  cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.29408553325533265
3
  Meta-Llama-3.1-8B-Instruct,0.3765752579792989
4
+ Qwen2_5_32B_Instruct,0.3923422946746861
5
  Qwen2_5_7B_Instruct,0.36472669481333536
6
+ Qwen2_5_1_5B_Instruct,0.2624938515155373
7
  Qwen2-72B-Instruct,0.4043588265556185
8
  cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
9
  Meta-Llama-3-8B-Instruct,0.33079891679041123
 
22
  gemma-2-2b-it,0.3482500758113138
23
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
24
  cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
25
+ Qwen2_5_0_5B_Instruct,0.15776662800152338
26
  GPT4o_0513,0.42589589086974855
27
  cross_openhermes_llama3_8b_8192_inst,0.3929315974686861
28
  cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -1,7 +1,9 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.24106736560355876
3
  Meta-Llama-3.1-8B-Instruct,0.31019605539004524
 
4
  Qwen2_5_7B_Instruct,0.3027564749728372
 
5
  Qwen2-72B-Instruct,0.33005323227052946
6
  cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.2973194210388712
20
  gemma-2-2b-it,0.27518909199172303
21
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
22
  cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
 
23
  GPT4o_0513,0.36219303373759176
24
  cross_openhermes_llama3_8b_8192_inst,0.29989110440173306
25
  cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.24106736560355876
3
  Meta-Llama-3.1-8B-Instruct,0.31019605539004524
4
+ Qwen2_5_32B_Instruct,0.33791529833420336
5
  Qwen2_5_7B_Instruct,0.3027564749728372
6
+ Qwen2_5_1_5B_Instruct,0.21935649300365245
7
  Qwen2-72B-Instruct,0.33005323227052946
8
  cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
9
  Meta-Llama-3-8B-Instruct,0.2647448190950291
 
22
  gemma-2-2b-it,0.27518909199172303
23
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
24
  cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
25
+ Qwen2_5_0_5B_Instruct,0.14677375445859656
26
  GPT4o_0513,0.36219303373759176
27
  cross_openhermes_llama3_8b_8192_inst,0.29989110440173306
28
  cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -1,7 +1,9 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.2113761361724575
3
  Meta-Llama-3.1-8B-Instruct,0.23889886925287113
 
4
  Qwen2_5_7B_Instruct,0.2437311220019033
 
5
  Qwen2-72B-Instruct,0.23893268538329387
6
  cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.23916426190948417
20
  gemma-2-2b-it,0.21164036008441425
21
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
22
  cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
 
23
  GPT4o_0513,0.27722306559544163
24
  cross_openhermes_llama3_8b_8192_inst,0.24473214674903845
25
  cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.2113761361724575
3
  Meta-Llama-3.1-8B-Instruct,0.23889886925287113
4
+ Qwen2_5_32B_Instruct,0.26924811164378015
5
  Qwen2_5_7B_Instruct,0.2437311220019033
6
+ Qwen2_5_1_5B_Instruct,0.18420680441018222
7
  Qwen2-72B-Instruct,0.23893268538329387
8
  cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
9
  Meta-Llama-3-8B-Instruct,0.199495011482748
 
22
  gemma-2-2b-it,0.21164036008441425
23
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
24
  cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
25
+ Qwen2_5_0_5B_Instruct,0.13846648470535672
26
  GPT4o_0513,0.27722306559544163
27
  cross_openhermes_llama3_8b_8192_inst,0.24473214674903845
28
  cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -1,7 +1,9 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.28031997065822994
3
  Meta-Llama-3.1-8B-Instruct,0.3700921225177551
 
4
  Qwen2_5_7B_Instruct,0.3466422765302921
 
5
  Qwen2-72B-Instruct,0.40796892621611885
6
  cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.3904643635616676
20
  gemma-2-2b-it,0.33737270487369614
21
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
22
  cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
 
23
  GPT4o_0513,0.451496635720668
24
  cross_openhermes_llama3_8b_8192_inst,0.39476822018254265
25
  cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.28031997065822994
3
  Meta-Llama-3.1-8B-Instruct,0.3700921225177551
4
+ Qwen2_5_32B_Instruct,0.40310877536446654
5
  Qwen2_5_7B_Instruct,0.3466422765302921
6
+ Qwen2_5_1_5B_Instruct,0.22890805100949677
7
  Qwen2-72B-Instruct,0.40796892621611885
8
  cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
9
  Meta-Llama-3-8B-Instruct,0.31625368345049
 
22
  gemma-2-2b-it,0.33737270487369614
23
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
24
  cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
25
+ Qwen2_5_0_5B_Instruct,0.1194369315142997
26
  GPT4o_0513,0.451496635720668
27
  cross_openhermes_llama3_8b_8192_inst,0.39476822018254265
28
  cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -17,6 +17,7 @@ gemma-2-9b-it,0.9222139117427075
17
  Meta-Llama-3-70B-Instruct,0.9521316379955124
18
  Qwen2_5_14B_Instruct,0.9502617801047121
19
  sg_llama3_8192_8b,0.8051608077786089
 
20
  sg_llama3_70b_inst,0.9289454001495886
21
  cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
22
  gemma-2-2b-it,0.7700074794315632
 
17
  Meta-Llama-3-70B-Instruct,0.9521316379955124
18
  Qwen2_5_14B_Instruct,0.9502617801047121
19
  sg_llama3_8192_8b,0.8051608077786089
20
+ Meta-Llama-3.1-70B,0.7786088257292446
21
  sg_llama3_70b_inst,0.9289454001495886
22
  cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
23
  gemma-2-2b-it,0.7700074794315632