Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/cultural_reasoning/few_shot/sg_eval_v2_mcq.csv +1 -0
- results/cultural_reasoning/few_shot/sg_eval_v2_open.csv +1 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +9 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +7 -0
- results/flores_translation/zero_shot/ind2eng.csv +3 -0
- results/flores_translation/zero_shot/vie2eng.csv +3 -0
- results/flores_translation/zero_shot/zho2eng.csv +3 -0
- results/flores_translation/zero_shot/zsm2eng.csv +3 -0
- results/fundamental_nlp_tasks/zero_shot/c3.csv +1 -0
results/cultural_reasoning/few_shot/sg_eval_v2_mcq.csv
CHANGED
@@ -1 +1,2 @@
|
|
1 |
Model,Accuracy
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-8B,0.7654545454545455
|
results/cultural_reasoning/few_shot/sg_eval_v2_open.csv
CHANGED
@@ -1 +1,2 @@
|
|
1 |
Model,Accuracy
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-8B,50.8
|
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv
CHANGED
@@ -1,23 +1,32 @@
|
|
1 |
Model,Accuracy
|
|
|
2 |
Meta-Llama-3.1-8B-Instruct,0.7854545454545454
|
|
|
3 |
Qwen2_5_32B_Instruct,0.8436363636363636
|
4 |
Qwen2_5_7B_Instruct,0.78
|
5 |
Qwen2_5_1_5B_Instruct,0.6636363636363637
|
|
|
6 |
cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
|
7 |
Meta-Llama-3-8B-Instruct,0.7909090909090909
|
|
|
8 |
Qwen2_5_3B_Instruct,0.72
|
9 |
cross_openhermes_llama3_8b_16384_inst,0.7963636363636364
|
10 |
cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
|
|
|
11 |
Qwen2_5_72B_Instruct,0.8618181818181818
|
|
|
12 |
Meta-Llama-3-70B-Instruct,0.8381818181818181
|
13 |
Qwen2_5_14B_Instruct,0.8345454545454546
|
14 |
sg_llama3_8192_8b,0.76
|
|
|
15 |
sg_llama3_70b_inst,0.8436363636363636
|
16 |
cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
|
17 |
gemma-2-2b-it,0.7163636363636363
|
|
|
18 |
cross_openhermes_llama3_8b_12288_inst,0.7890909090909091
|
19 |
Qwen2_5_0_5B_Instruct,0.5727272727272728
|
20 |
GPT4o_0513,0.8709090909090909
|
21 |
cross_openhermes_llama3_8b_8192_inst,0.78
|
22 |
cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
|
23 |
cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.7872727272727272
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7854545454545454
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.7836363636363637
|
5 |
Qwen2_5_32B_Instruct,0.8436363636363636
|
6 |
Qwen2_5_7B_Instruct,0.78
|
7 |
Qwen2_5_1_5B_Instruct,0.6636363636363637
|
8 |
+
Qwen2-72B-Instruct,0.8581818181818182
|
9 |
cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
|
10 |
Meta-Llama-3-8B-Instruct,0.7909090909090909
|
11 |
+
Meta-Llama-3.1-70B-Instruct,0.8763636363636363
|
12 |
Qwen2_5_3B_Instruct,0.72
|
13 |
cross_openhermes_llama3_8b_16384_inst,0.7963636363636364
|
14 |
cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
|
15 |
+
SeaLLMs-v3-7B-Chat,0.7836363636363637
|
16 |
Qwen2_5_72B_Instruct,0.8618181818181818
|
17 |
+
gemma-2-9b-it,0.8036363636363636
|
18 |
Meta-Llama-3-70B-Instruct,0.8381818181818181
|
19 |
Qwen2_5_14B_Instruct,0.8345454545454546
|
20 |
sg_llama3_8192_8b,0.76
|
21 |
+
Meta-Llama-3.1-70B,0.44181818181818183
|
22 |
sg_llama3_70b_inst,0.8436363636363636
|
23 |
cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
|
24 |
gemma-2-2b-it,0.7163636363636363
|
25 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.7763636363636364
|
26 |
cross_openhermes_llama3_8b_12288_inst,0.7890909090909091
|
27 |
Qwen2_5_0_5B_Instruct,0.5727272727272728
|
28 |
GPT4o_0513,0.8709090909090909
|
29 |
cross_openhermes_llama3_8b_8192_inst,0.78
|
30 |
cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
|
31 |
cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
|
32 |
+
Meta-Llama-3.1-8B,0.4381818181818182
|
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv
CHANGED
@@ -1,20 +1,27 @@
|
|
1 |
Model,Accuracy
|
|
|
2 |
Meta-Llama-3.1-8B-Instruct,49.2
|
|
|
3 |
Qwen2_5_32B_Instruct,53.2
|
4 |
Qwen2_5_7B_Instruct,50.279999999999994
|
5 |
Qwen2_5_1_5B_Instruct,44.480000000000004
|
|
|
6 |
cross_openhermes_llama3_8b_4096_inst,51.6
|
7 |
Meta-Llama-3-8B-Instruct,51.120000000000005
|
|
|
8 |
Qwen2_5_3B_Instruct,47.24
|
9 |
cross_openhermes_llama3_8b_16384_inst,52.44
|
10 |
cross_openhermes_llama3_70b_4096_inst_2,55.8
|
|
|
11 |
Qwen2_5_72B_Instruct,53.32
|
|
|
12 |
Meta-Llama-3-70B-Instruct,50.599999999999994
|
13 |
Qwen2_5_14B_Instruct,53.2
|
14 |
sg_llama3_8192_8b,53.4
|
15 |
sg_llama3_70b_inst,51.959999999999994
|
16 |
cross_openhermes_llama3_8b_2048_inst,52.24
|
17 |
gemma-2-2b-it,52.08
|
|
|
18 |
cross_openhermes_llama3_8b_12288_inst,52.480000000000004
|
19 |
Qwen2_5_0_5B_Instruct,35.28
|
20 |
GPT4o_0513,57.28
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,56.559999999999995
|
3 |
Meta-Llama-3.1-8B-Instruct,49.2
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,50.03999999999999
|
5 |
Qwen2_5_32B_Instruct,53.2
|
6 |
Qwen2_5_7B_Instruct,50.279999999999994
|
7 |
Qwen2_5_1_5B_Instruct,44.480000000000004
|
8 |
+
Qwen2-72B-Instruct,54.080000000000005
|
9 |
cross_openhermes_llama3_8b_4096_inst,51.6
|
10 |
Meta-Llama-3-8B-Instruct,51.120000000000005
|
11 |
+
Meta-Llama-3.1-70B-Instruct,51.31999999999999
|
12 |
Qwen2_5_3B_Instruct,47.24
|
13 |
cross_openhermes_llama3_8b_16384_inst,52.44
|
14 |
cross_openhermes_llama3_70b_4096_inst_2,55.8
|
15 |
+
SeaLLMs-v3-7B-Chat,55.0
|
16 |
Qwen2_5_72B_Instruct,53.32
|
17 |
+
gemma-2-9b-it,53.96
|
18 |
Meta-Llama-3-70B-Instruct,50.599999999999994
|
19 |
Qwen2_5_14B_Instruct,53.2
|
20 |
sg_llama3_8192_8b,53.4
|
21 |
sg_llama3_70b_inst,51.959999999999994
|
22 |
cross_openhermes_llama3_8b_2048_inst,52.24
|
23 |
gemma-2-2b-it,52.08
|
24 |
+
llama3-8b-cpt-sea-lionv2-instruct,50.03999999999999
|
25 |
cross_openhermes_llama3_8b_12288_inst,52.480000000000004
|
26 |
Qwen2_5_0_5B_Instruct,35.28
|
27 |
GPT4o_0513,57.28
|
results/flores_translation/zero_shot/ind2eng.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.29408553325533265
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3765752579792989
|
|
|
4 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
|
|
5 |
Qwen2-72B-Instruct,0.4043588265556185
|
6 |
cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
|
7 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.3904916300086918
|
|
20 |
gemma-2-2b-it,0.3482500758113138
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
|
22 |
cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
|
|
|
23 |
GPT4o_0513,0.42589589086974855
|
24 |
cross_openhermes_llama3_8b_8192_inst,0.3929315974686861
|
25 |
cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.29408553325533265
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3765752579792989
|
4 |
+
Qwen2_5_32B_Instruct,0.3923422946746861
|
5 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
6 |
+
Qwen2_5_1_5B_Instruct,0.2624938515155373
|
7 |
Qwen2-72B-Instruct,0.4043588265556185
|
8 |
cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
|
9 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
|
|
22 |
gemma-2-2b-it,0.3482500758113138
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
|
24 |
cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
|
25 |
+
Qwen2_5_0_5B_Instruct,0.15776662800152338
|
26 |
GPT4o_0513,0.42589589086974855
|
27 |
cross_openhermes_llama3_8b_8192_inst,0.3929315974686861
|
28 |
cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
|
results/flores_translation/zero_shot/vie2eng.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.24106736560355876
|
3 |
Meta-Llama-3.1-8B-Instruct,0.31019605539004524
|
|
|
4 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
|
|
5 |
Qwen2-72B-Instruct,0.33005323227052946
|
6 |
cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
|
7 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.2973194210388712
|
|
20 |
gemma-2-2b-it,0.27518909199172303
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
|
22 |
cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
|
|
|
23 |
GPT4o_0513,0.36219303373759176
|
24 |
cross_openhermes_llama3_8b_8192_inst,0.29989110440173306
|
25 |
cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.24106736560355876
|
3 |
Meta-Llama-3.1-8B-Instruct,0.31019605539004524
|
4 |
+
Qwen2_5_32B_Instruct,0.33791529833420336
|
5 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
6 |
+
Qwen2_5_1_5B_Instruct,0.21935649300365245
|
7 |
Qwen2-72B-Instruct,0.33005323227052946
|
8 |
cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
|
9 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
|
|
22 |
gemma-2-2b-it,0.27518909199172303
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
|
24 |
cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
|
25 |
+
Qwen2_5_0_5B_Instruct,0.14677375445859656
|
26 |
GPT4o_0513,0.36219303373759176
|
27 |
cross_openhermes_llama3_8b_8192_inst,0.29989110440173306
|
28 |
cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
|
results/flores_translation/zero_shot/zho2eng.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.2113761361724575
|
3 |
Meta-Llama-3.1-8B-Instruct,0.23889886925287113
|
|
|
4 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
|
|
5 |
Qwen2-72B-Instruct,0.23893268538329387
|
6 |
cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
|
7 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.23916426190948417
|
|
20 |
gemma-2-2b-it,0.21164036008441425
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
|
22 |
cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
|
|
|
23 |
GPT4o_0513,0.27722306559544163
|
24 |
cross_openhermes_llama3_8b_8192_inst,0.24473214674903845
|
25 |
cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.2113761361724575
|
3 |
Meta-Llama-3.1-8B-Instruct,0.23889886925287113
|
4 |
+
Qwen2_5_32B_Instruct,0.26924811164378015
|
5 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
6 |
+
Qwen2_5_1_5B_Instruct,0.18420680441018222
|
7 |
Qwen2-72B-Instruct,0.23893268538329387
|
8 |
cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
|
9 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
|
|
22 |
gemma-2-2b-it,0.21164036008441425
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
|
24 |
cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
|
25 |
+
Qwen2_5_0_5B_Instruct,0.13846648470535672
|
26 |
GPT4o_0513,0.27722306559544163
|
27 |
cross_openhermes_llama3_8b_8192_inst,0.24473214674903845
|
28 |
cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
|
results/flores_translation/zero_shot/zsm2eng.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.28031997065822994
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3700921225177551
|
|
|
4 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
|
|
5 |
Qwen2-72B-Instruct,0.40796892621611885
|
6 |
cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
|
7 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
@@ -20,6 +22,7 @@ cross_openhermes_llama3_8b_2048_inst,0.3904643635616676
|
|
20 |
gemma-2-2b-it,0.33737270487369614
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
|
22 |
cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
|
|
|
23 |
GPT4o_0513,0.451496635720668
|
24 |
cross_openhermes_llama3_8b_8192_inst,0.39476822018254265
|
25 |
cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.28031997065822994
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3700921225177551
|
4 |
+
Qwen2_5_32B_Instruct,0.40310877536446654
|
5 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
6 |
+
Qwen2_5_1_5B_Instruct,0.22890805100949677
|
7 |
Qwen2-72B-Instruct,0.40796892621611885
|
8 |
cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
|
9 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
|
|
22 |
gemma-2-2b-it,0.33737270487369614
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
|
24 |
cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
|
25 |
+
Qwen2_5_0_5B_Instruct,0.1194369315142997
|
26 |
GPT4o_0513,0.451496635720668
|
27 |
cross_openhermes_llama3_8b_8192_inst,0.39476822018254265
|
28 |
cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
|
results/fundamental_nlp_tasks/zero_shot/c3.csv
CHANGED
@@ -17,6 +17,7 @@ gemma-2-9b-it,0.9222139117427075
|
|
17 |
Meta-Llama-3-70B-Instruct,0.9521316379955124
|
18 |
Qwen2_5_14B_Instruct,0.9502617801047121
|
19 |
sg_llama3_8192_8b,0.8051608077786089
|
|
|
20 |
sg_llama3_70b_inst,0.9289454001495886
|
21 |
cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
|
22 |
gemma-2-2b-it,0.7700074794315632
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.9521316379955124
|
18 |
Qwen2_5_14B_Instruct,0.9502617801047121
|
19 |
sg_llama3_8192_8b,0.8051608077786089
|
20 |
+
Meta-Llama-3.1-70B,0.7786088257292446
|
21 |
sg_llama3_70b_inst,0.9289454001495886
|
22 |
cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
|
23 |
gemma-2-2b-it,0.7700074794315632
|