Spaces:
Running
Running
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- results/cross_lingual/few_shot/cross_logiqa.csv +4 -0
- results/cross_lingual/few_shot/cross_mmlu.csv +4 -0
- results/cross_lingual/few_shot/cross_xquad.csv +4 -0
- results/cross_lingual/zero_shot/cross_logiqa.csv +9 -0
- results/cross_lingual/zero_shot/cross_mmlu.csv +9 -0
- results/cross_lingual/zero_shot/cross_xquad.csv +9 -0
- results/cultural_reasoning/few_shot/cn_eval.csv +4 -0
- results/cultural_reasoning/few_shot/ph_eval.csv +4 -0
- results/cultural_reasoning/few_shot/sg_eval.csv +4 -0
- results/cultural_reasoning/few_shot/us_eval.csv +4 -0
- results/cultural_reasoning/zero_shot/cn_eval.csv +9 -0
- results/cultural_reasoning/zero_shot/ph_eval.csv +9 -0
- results/cultural_reasoning/zero_shot/sg_eval.csv +9 -0
- results/cultural_reasoning/zero_shot/us_eval.csv +9 -0
- results/dialogue/few_shot/dream.csv +4 -0
- results/dialogue/zero_shot/dialogsum.csv +9 -0
- results/dialogue/zero_shot/dream.csv +9 -0
- results/dialogue/zero_shot/samsum.csv +9 -0
- results/emotion/few_shot/ind_emotion.csv +4 -0
- results/emotion/few_shot/sst2.csv +4 -0
- results/emotion/zero_shot/ind_emotion.csv +9 -0
- results/emotion/zero_shot/sst2.csv +9 -0
- results/flores_translation/few_shot/ind2eng.csv +4 -0
- results/flores_translation/few_shot/vie2eng.csv +4 -0
- results/flores_translation/few_shot/zho2eng.csv +4 -0
- results/flores_translation/few_shot/zsm2eng.csv +4 -0
- results/flores_translation/zero_shot/ind2eng.csv +9 -0
- results/flores_translation/zero_shot/vie2eng.csv +9 -0
- results/flores_translation/zero_shot/zho2eng.csv +9 -0
- results/flores_translation/zero_shot/zsm2eng.csv +9 -0
- results/fundamental_nlp_tasks/few_shot/c3.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/cola.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/mnli.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/mrpc.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/ocnli.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/qnli.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/qqp.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/rte.csv +4 -0
- results/fundamental_nlp_tasks/few_shot/wnli.csv +4 -0
- results/fundamental_nlp_tasks/zero_shot/c3.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/cola.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/mnli.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/mrpc.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/ocnli.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/qnli.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/qqp.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/rte.csv +9 -0
- results/fundamental_nlp_tasks/zero_shot/wnli.csv +9 -0
- results/general_reasoning/few_shot/c_eval.csv +4 -0
- results/general_reasoning/few_shot/cmmlu.csv +4 -0
results/cross_lingual/few_shot/cross_logiqa.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Meta-Llama-3-70B,0.6152597402597404,0.49480519480519464,0.5484971301967684,0.7272727272727273,0.6534090909090909,0.625,0.5681818181818182,0.6136363636363636,0.5795454545454546,0.5397727272727273
|
3 |
+
Meta-Llama-3-8B,0.44967532467532456,0.2623376623376623,0.33136129711503204,0.5227272727272727,0.4431818181818182,0.44886363636363635,0.44886363636363635,0.3693181818181818,0.4602272727272727,0.45454545454545453
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.43993506493506496,0.27012987012987016,0.3347288285088485,0.5170454545454546,0.4375,0.4431818181818182,0.4772727272727273,0.4090909090909091,0.4659090909090909,0.32954545454545453
|
5 |
+
Meta-Llama-3.1-8B,0.46266233766233766,0.277435064935065,0.34686989908229837,0.5284090909090909,0.5,0.4375,0.4772727272727273,0.4318181818181818,0.4431818181818182,0.42045454545454547
|
results/cross_lingual/few_shot/cross_mmlu.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Meta-Llama-3-70B,0.7552380952380952,0.6674285714285715,0.708623453080271,0.8066666666666666,0.7266666666666667,0.7866666666666666,0.7533333333333333,0.7733333333333333,0.72,0.72
|
3 |
+
Meta-Llama-3-8B,0.5295238095238096,0.31923809523809527,0.3983311959862401,0.6266666666666667,0.5466666666666666,0.56,0.4866666666666667,0.5266666666666666,0.5,0.46
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.5228571428571429,0.32704761904761903,0.402396106759339,0.6533333333333333,0.44,0.5066666666666667,0.47333333333333333,0.58,0.5466666666666666,0.46
|
5 |
+
Meta-Llama-3.1-8B,0.5342857142857141,0.2960000000000001,0.3809497590731823,0.6733333333333333,0.5533333333333333,0.5133333333333333,0.47333333333333333,0.5133333333333333,0.5,0.5133333333333333
|
results/cross_lingual/few_shot/cross_xquad.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Meta-Llama-3-70B,0.9596638655462185,0.9359243697478992,0.9476454662047799,0.9697478991596639,0.9504201680672268,0.957983193277311,0.9605042016806723,,,
|
3 |
+
Meta-Llama-3-8B,0.8928571428571429,0.8163865546218487,0.8529112234365448,0.926890756302521,0.8823529411764706,0.888235294117647,0.8739495798319328,,,
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.9029411764705881,0.842016806722689,0.8714154189951169,0.9218487394957983,0.8815126050420168,0.9058823529411765,0.9025210084033614,,,
|
5 |
+
Meta-Llama-3.1-8B,0.9052521008403361,0.8355042016806722,0.8689808363106925,0.9352941176470588,0.8932773109243698,0.9,0.892436974789916,,,
|
results/cross_lingual/zero_shot/cross_logiqa.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Qwen2-7B-Instruct,0.5673701298701299,0.477922077922078,0.5188189663543613,0.6590909090909091,0.6704545454545454,0.5340909090909091,0.5625,0.5340909090909091,0.5397727272727273,0.4715909090909091
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.43993506493506496,0.33425324675324675,0.37988102268160845,0.5113636363636364,0.45454545454545453,0.4772727272727273,0.48295454545454547,0.3977272727272727,0.39204545454545453,0.36363636363636365
|
4 |
+
Qwen2-72B-Instruct,0.6753246753246753,0.6814935064935067,0.6783950674333673,0.75,0.8125,0.6647727272727273,0.6136363636363636,0.6420454545454546,0.6590909090909091,0.5852272727272727
|
5 |
+
Meta-Llama-3-8B-Instruct,0.4115259740259741,0.34042207792207796,0.3726122484532397,0.48863636363636365,0.4659090909090909,0.42613636363636365,0.4034090909090909,0.4034090909090909,0.36363636363636365,0.32954545454545453
|
6 |
+
SeaLLMs-v3-7B-Chat,0.5633116883116883,0.5176948051948052,0.5395407640365807,0.6079545454545454,0.7045454545454546,0.5681818181818182,0.5511363636363636,0.5340909090909091,0.5170454545454546,0.4602272727272727
|
7 |
+
gemma-2-9b-it,0.6193181818181818,0.5688311688311687,0.5930020245684557,0.6818181818181818,0.6590909090909091,0.5625,0.6193181818181818,0.5909090909090909,0.6306818181818182,0.5909090909090909
|
8 |
+
Meta-Llama-3-70B-Instruct,0.6290584415584416,0.6181818181818182,0.6235727047409828,0.6988636363636364,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
|
9 |
+
gemma-2-2b-it,0.48214285714285715,0.44772727272727286,0.4642981843076105,0.5625,0.5113636363636364,0.48863636363636365,0.5,0.4431818181818182,0.4659090909090909,0.4034090909090909
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.43831168831168826,0.38831168831168833,0.41179951229957745,0.4943181818181818,0.48295454545454547,0.48295454545454547,0.4318181818181818,0.4147727272727273,0.38636363636363635,0.375
|
results/cross_lingual/zero_shot/cross_mmlu.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Qwen2-7B-Instruct,0.6495238095238095,0.529714285714286,0.5835327779462245,0.74,0.6733333333333333,0.7,0.6,0.6533333333333333,0.6333333333333333,0.5466666666666666
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.5771428571428572,0.47047619047619055,0.5183792207297393,0.6933333333333334,0.5333333333333333,0.6266666666666667,0.54,0.54,0.54,0.5666666666666667
|
4 |
+
Qwen2-72B-Instruct,0.7714285714285715,0.7765714285714286,0.773991456997936,0.8,0.78,0.7866666666666666,0.7333333333333333,0.76,0.78,0.76
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5276190476190475,0.3792380952380953,0.4412894449458876,0.62,0.5066666666666667,0.5066666666666667,0.5466666666666666,0.49333333333333335,0.52,0.5
|
6 |
+
SeaLLMs-v3-7B-Chat,0.6580952380952381,0.6253333333333335,0.641296131344116,0.7466666666666667,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.66,0.58,0.5866666666666667
|
7 |
+
gemma-2-9b-it,0.7114285714285715,0.7201904761904762,0.7157827111185566,0.76,0.7333333333333333,0.7,0.66,0.7066666666666667,0.6933333333333334,0.7266666666666667
|
8 |
+
Meta-Llama-3-70B-Instruct,0.7542857142857143,0.7228571428571428,0.7382370820168919,0.7933333333333333,0.74,0.7666666666666667,0.7466666666666667,0.7666666666666667,0.72,0.7466666666666667
|
9 |
+
gemma-2-2b-it,0.5752380952380953,0.5333333333333332,0.5534936998355239,0.6866666666666666,0.5866666666666667,0.6066666666666667,0.5466666666666666,0.5466666666666666,0.5133333333333333,0.54
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.5466666666666667,0.4720000000000001,0.5065968585890122,0.66,0.49333333333333335,0.5466666666666666,0.5866666666666667,0.5666666666666667,0.5066666666666667,0.4666666666666667
|
results/cross_lingual/zero_shot/cross_xquad.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
+
Qwen2-7B-Instruct,0.940546218487395,0.9016806722689076,0.9207034712119446,0.9521008403361344,0.9352941176470588,0.9445378151260504,0.9302521008403362,,,
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.9340336134453782,0.8831932773109243,0.9079022683718587,0.9369747899159664,0.9302521008403362,0.946218487394958,0.9226890756302522,,,
|
4 |
+
Qwen2-72B-Instruct,0.9611344537815126,0.9506302521008403,0.9558534951942531,0.9638655462184874,0.9554621848739496,0.9613445378151261,0.9638655462184874,,,
|
5 |
+
Meta-Llama-3-8B-Instruct,0.8756302521008403,0.7699579831932772,0.8194012188828194,0.8815126050420168,0.8420168067226891,0.9092436974789916,0.8697478991596639,,,
|
6 |
+
SeaLLMs-v3-7B-Chat,0.9394957983193277,0.9172268907563025,0.9282278015934072,0.9512605042016806,0.938655462184874,0.938655462184874,0.9294117647058824,,,
|
7 |
+
gemma-2-9b-it,0.9571428571428572,0.9352941176470588,0.9460923622945893,0.9663865546218487,0.9411764705882353,0.9613445378151261,0.9596638655462185,,,
|
8 |
+
Meta-Llama-3-70B-Instruct,0.9586134453781513,0.9434873949579832,0.9509902767764395,0.9705882352941176,0.9394957983193277,0.9596638655462185,0.9647058823529412,,,
|
9 |
+
gemma-2-2b-it,0.9149159663865546,0.8632352941176471,0.888324599638689,0.9302521008403362,0.9016806722689076,0.9184873949579831,0.9092436974789916,,,
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.8930672268907562,0.8262605042016806,0.8583659343003551,0.9142857142857143,0.8798319327731092,0.8890756302521008,0.8890756302521008,,,
|
results/cultural_reasoning/few_shot/cn_eval.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.6
|
3 |
+
Meta-Llama-3-8B,0.41904761904761906
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.4095238095238095
|
5 |
+
Meta-Llama-3.1-8B,0.4857142857142857
|
results/cultural_reasoning/few_shot/ph_eval.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.68
|
3 |
+
Meta-Llama-3-8B,0.54
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.52
|
5 |
+
Meta-Llama-3.1-8B,0.51
|
results/cultural_reasoning/few_shot/sg_eval.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.7572815533980582
|
3 |
+
Meta-Llama-3-8B,0.6407766990291263
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.6310679611650486
|
5 |
+
Meta-Llama-3.1-8B,0.6116504854368932
|
results/cultural_reasoning/few_shot/us_eval.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.8785046728971962
|
3 |
+
Meta-Llama-3-8B,0.6915887850467289
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.719626168224299
|
5 |
+
Meta-Llama-3.1-8B,0.6728971962616822
|
results/cultural_reasoning/zero_shot/cn_eval.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.8095238095238095
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.42857142857142855
|
4 |
+
Qwen2-72B-Instruct,0.8571428571428571
|
5 |
+
Meta-Llama-3-8B-Instruct,0.37142857142857144
|
6 |
+
SeaLLMs-v3-7B-Chat,0.8095238095238095
|
7 |
+
gemma-2-9b-it,0.6190476190476191
|
8 |
+
Meta-Llama-3-70B-Instruct,0.5142857142857142
|
9 |
+
gemma-2-2b-it,0.4095238095238095
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.47619047619047616
|
results/cultural_reasoning/zero_shot/ph_eval.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.51
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.56
|
4 |
+
Qwen2-72B-Instruct,0.63
|
5 |
+
Meta-Llama-3-8B-Instruct,0.54
|
6 |
+
SeaLLMs-v3-7B-Chat,0.5
|
7 |
+
gemma-2-9b-it,0.61
|
8 |
+
Meta-Llama-3-70B-Instruct,0.63
|
9 |
+
gemma-2-2b-it,0.39
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.53
|
results/cultural_reasoning/zero_shot/sg_eval.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.6699029126213593
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6019417475728155
|
4 |
+
Qwen2-72B-Instruct,0.7378640776699029
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5922330097087378
|
6 |
+
SeaLLMs-v3-7B-Chat,0.6310679611650486
|
7 |
+
gemma-2-9b-it,0.6893203883495146
|
8 |
+
Meta-Llama-3-70B-Instruct,0.7184466019417476
|
9 |
+
gemma-2-2b-it,0.5533980582524272
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.6019417475728155
|
results/cultural_reasoning/zero_shot/us_eval.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.719626168224299
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6448598130841121
|
4 |
+
Qwen2-72B-Instruct,0.8504672897196262
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6448598130841121
|
6 |
+
SeaLLMs-v3-7B-Chat,0.7009345794392523
|
7 |
+
gemma-2-9b-it,0.8317757009345794
|
8 |
+
Meta-Llama-3-70B-Instruct,0.8691588785046729
|
9 |
+
gemma-2-2b-it,0.7102803738317757
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.6542056074766355
|
results/dialogue/few_shot/dream.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.9510044096031357
|
3 |
+
Meta-Llama-3-8B,0.8250857422831945
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.8515433610975012
|
5 |
+
Meta-Llama-3.1-8B,0.8530132288094071
|
results/dialogue/zero_shot/dialogsum.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
+
Qwen2-7B-Instruct,0.20907406151501814,0.3054588156947843,0.09317750879187732,0.22858586005839285
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.25775524210830225,0.361264483769506,0.1319601664036931,0.28004107615170776
|
4 |
+
Qwen2-72B-Instruct,0.21903635116217549,0.31670807543803475,0.10250931612356096,0.23789166192493072
|
5 |
+
Meta-Llama-3-8B-Instruct,0.23748034560689027,0.33656243928704743,0.11826169056076426,0.2576169069728591
|
6 |
+
SeaLLMs-v3-7B-Chat,0.24723061042117522,0.3515679169380843,0.12081049484108507,0.2693134194843562
|
7 |
+
gemma-2-9b-it,0.2587338648607764,0.3658237880022337,0.12722373001686862,0.2831540765632268
|
8 |
+
Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
|
9 |
+
gemma-2-2b-it,0.26123184071161726,0.3683777522574926,0.12793735218483035,0.28738041769252887
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.2531827068435159,0.35516222681696785,0.12864609875605545,0.2757397949575244
|
results/dialogue/zero_shot/dream.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.9338559529642332
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.8858402743753062
|
4 |
+
Qwen2-72B-Instruct,0.9603135717785399
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5433610975012249
|
6 |
+
SeaLLMs-v3-7B-Chat,0.9211170994610485
|
7 |
+
gemma-2-9b-it,0.9397354238118569
|
8 |
+
Meta-Llama-3-70B-Instruct,0.9480646741793238
|
9 |
+
gemma-2-2b-it,0.8486036256736894
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.7555120039196472
|
results/dialogue/zero_shot/samsum.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
+
Qwen2-7B-Instruct,0.2609036529701212,0.36802926348230236,0.1319027531874975,0.28277894224056366
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.3002534894623792,0.41234119292969856,0.16596515741670248,0.3224541180407366
|
4 |
+
Qwen2-72B-Instruct,0.27953180135225114,0.3883786925058577,0.15246657328712612,0.2977501382637696
|
5 |
+
Meta-Llama-3-8B-Instruct,0.2850232460296334,0.3945214081577773,0.15619034353394273,0.3043579863971803
|
6 |
+
SeaLLMs-v3-7B-Chat,0.2947730352305254,0.40661343212311085,0.16241730068430632,0.31528837288415906
|
7 |
+
gemma-2-9b-it,0.30920311453647803,0.4269492679851157,0.16650133263007386,0.33415874299424464
|
8 |
+
Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
|
9 |
+
gemma-2-2b-it,0.3067902178200617,0.4277497131478937,0.1609158209467132,0.3317051193655783
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.29924948830821335,0.40828658585731714,0.16733998585334992,0.32212189321397305
|
results/emotion/few_shot/ind_emotion.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.7159090909090909
|
3 |
+
Meta-Llama-3-8B,0.4636363636363636
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.525
|
5 |
+
Meta-Llama-3.1-8B,0.5136363636363637
|
results/emotion/few_shot/sst2.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.9002293577981652
|
3 |
+
Meta-Llama-3-8B,0.6697247706422018
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.75
|
5 |
+
Meta-Llama-3.1-8B,0.8405963302752294
|
results/emotion/zero_shot/ind_emotion.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.6386363636363637
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6295454545454545
|
4 |
+
Qwen2-72B-Instruct,0.675
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6522727272727272
|
6 |
+
SeaLLMs-v3-7B-Chat,0.34545454545454546
|
7 |
+
gemma-2-9b-it,0.7431818181818182
|
8 |
+
Meta-Llama-3-70B-Instruct,0.6909090909090909
|
9 |
+
gemma-2-2b-it,0.625
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.6272727272727273
|
results/emotion/zero_shot/sst2.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.9231651376146789
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.8784403669724771
|
4 |
+
Qwen2-72B-Instruct,0.9369266055045872
|
5 |
+
Meta-Llama-3-8B-Instruct,0.8669724770642202
|
6 |
+
SeaLLMs-v3-7B-Chat,0.9346330275229358
|
7 |
+
gemma-2-9b-it,0.9311926605504587
|
8 |
+
Meta-Llama-3-70B-Instruct,0.9495412844036697
|
9 |
+
gemma-2-2b-it,0.9208715596330275
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.9162844036697247
|
results/flores_translation/few_shot/ind2eng.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Meta-Llama-3-70B,0.4224655367668861
|
3 |
+
Meta-Llama-3-8B,0.37760317005449096
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.37662180389435995
|
5 |
+
Meta-Llama-3.1-8B,0.384092499597103
|
results/flores_translation/few_shot/vie2eng.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Meta-Llama-3-70B,0.3564689224836266
|
3 |
+
Meta-Llama-3-8B,0.31157996445764863
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.30608365217733097
|
5 |
+
Meta-Llama-3.1-8B,0.320367356810332
|
results/flores_translation/few_shot/zho2eng.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Meta-Llama-3-70B,0.27798501796196434
|
3 |
+
Meta-Llama-3-8B,0.23710858530408072
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.22831898923969038
|
5 |
+
Meta-Llama-3.1-8B,0.23777256698409086
|
results/flores_translation/few_shot/zsm2eng.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Meta-Llama-3-70B,0.44357168236218214
|
3 |
+
Meta-Llama-3-8B,0.3908770132718593
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.37668373435658764
|
5 |
+
Meta-Llama-3.1-8B,0.3893813156403672
|
results/flores_translation/zero_shot/ind2eng.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Qwen2-7B-Instruct,0.2968667083646938
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.3851478947359834
|
4 |
+
Qwen2-72B-Instruct,0.40378146176265345
|
5 |
+
Meta-Llama-3-8B-Instruct,0.33011728860318257
|
6 |
+
SeaLLMs-v3-7B-Chat,0.3642282499148727
|
7 |
+
gemma-2-9b-it,0.4115273387213549
|
8 |
+
Meta-Llama-3-70B-Instruct,0.3830092775167675
|
9 |
+
gemma-2-2b-it,0.3496340692126605
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.39322992478935465
|
results/flores_translation/zero_shot/vie2eng.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Qwen2-7B-Instruct,0.23571859325121644
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.3229889780558947
|
4 |
+
Qwen2-72B-Instruct,0.3326034551014482
|
5 |
+
Meta-Llama-3-8B-Instruct,0.2637063711923046
|
6 |
+
SeaLLMs-v3-7B-Chat,0.3073965938987496
|
7 |
+
gemma-2-9b-it,0.33638205957057027
|
8 |
+
Meta-Llama-3-70B-Instruct,0.3230140263371192
|
9 |
+
gemma-2-2b-it,0.2717960864611513
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.33210048239854756
|
results/flores_translation/zero_shot/zho2eng.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Qwen2-7B-Instruct,0.21747115262398484
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.24469097639356438
|
4 |
+
Qwen2-72B-Instruct,0.24317967002278634
|
5 |
+
Meta-Llama-3-8B-Instruct,0.19960072119079214
|
6 |
+
SeaLLMs-v3-7B-Chat,0.25023469014968713
|
7 |
+
gemma-2-9b-it,0.26747029920541504
|
8 |
+
Meta-Llama-3-70B-Instruct,0.24397819518058994
|
9 |
+
gemma-2-2b-it,0.21203164253450932
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.24572934810342245
|
results/flores_translation/zero_shot/zsm2eng.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,BLEU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,BLEU
|
2 |
+
Qwen2-7B-Instruct,0.27198336767927184
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.3833985449157327
|
4 |
+
Qwen2-72B-Instruct,0.40613262295280417
|
5 |
+
Meta-Llama-3-8B-Instruct,0.31536374302282033
|
6 |
+
SeaLLMs-v3-7B-Chat,0.3535493169696862
|
7 |
+
gemma-2-9b-it,0.4248122066845582
|
8 |
+
Meta-Llama-3-70B-Instruct,0.3957287030176054
|
9 |
+
gemma-2-2b-it,0.33384917509056944
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.391912232406389
|
results/fundamental_nlp_tasks/few_shot/c3.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.9390426327599103
|
3 |
+
Meta-Llama-3-8B,0.7703814510097232
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.7913238593866866
|
5 |
+
Meta-Llama-3.1-8B,0.8208676140613314
|
results/fundamental_nlp_tasks/few_shot/cola.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.7171620325982742
|
3 |
+
Meta-Llama-3-8B,0.6596356663470757
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.6021093000958773
|
5 |
+
Meta-Llama-3.1-8B,0.6222435282837967
|
results/fundamental_nlp_tasks/few_shot/mnli.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.7505
|
3 |
+
Meta-Llama-3-8B,0.46174988547869905
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.472
|
5 |
+
Meta-Llama-3.1-8B,0.48506133251895966
|
results/fundamental_nlp_tasks/few_shot/mrpc.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.6764705882352942
|
3 |
+
Meta-Llama-3-8B,0.5906862745098039
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.6078431372549019
|
5 |
+
Meta-Llama-3.1-8B,0.5661764705882353
|
results/fundamental_nlp_tasks/few_shot/ocnli.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.6840677966101695
|
3 |
+
Meta-Llama-3-8B,0.3935593220338983
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.3840677966101695
|
5 |
+
Meta-Llama-3.1-8B,0.411864406779661
|
results/fundamental_nlp_tasks/few_shot/qnli.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.572
|
3 |
+
Meta-Llama-3-8B,0.5059491122094087
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.49716273110012815
|
5 |
+
Meta-Llama-3.1-8B,0.5081457074867289
|
results/fundamental_nlp_tasks/few_shot/qqp.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.7215
|
3 |
+
Meta-Llama-3-8B,0.551
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.519
|
5 |
+
Meta-Llama-3.1-8B,0.5565
|
results/fundamental_nlp_tasks/few_shot/rte.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.776173285198556
|
3 |
+
Meta-Llama-3-8B,0.5487364620938628
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.6462093862815884
|
5 |
+
Meta-Llama-3.1-8B,0.6137184115523465
|
results/fundamental_nlp_tasks/few_shot/wnli.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.8169014084507042
|
3 |
+
Meta-Llama-3-8B,0.4647887323943662
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.5915492957746479
|
5 |
+
Meta-Llama-3.1-8B,0.5211267605633803
|
results/fundamental_nlp_tasks/zero_shot/c3.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.9233358264771877
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.7984293193717278
|
4 |
+
Qwen2-72B-Instruct,0.9599850411368736
|
5 |
+
Meta-Llama-3-8B-Instruct,0.8515332834704562
|
6 |
+
SeaLLMs-v3-7B-Chat,0.912490650710546
|
7 |
+
gemma-2-9b-it,0.9210919970082274
|
8 |
+
Meta-Llama-3-70B-Instruct,0.9521316379955124
|
9 |
+
gemma-2-2b-it,0.7703814510097232
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.675392670157068
|
results/fundamental_nlp_tasks/zero_shot/cola.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.7861936720997124
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.7046979865771812
|
4 |
+
Qwen2-72B-Instruct,0.8360498561840843
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6481303930968361
|
6 |
+
SeaLLMs-v3-7B-Chat,0.7890699904122723
|
7 |
+
gemma-2-9b-it,0.7967401725790988
|
8 |
+
Meta-Llama-3-70B-Instruct,0.835091083413231
|
9 |
+
gemma-2-2b-it,0.6711409395973155
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.5915627996164909
|
results/fundamental_nlp_tasks/zero_shot/mnli.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.7341578867002596
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.4603756298671553
|
4 |
+
Qwen2-72B-Instruct,0.7979844251030692
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5296991907161399
|
6 |
+
SeaLLMs-v3-7B-Chat,0.638
|
7 |
+
gemma-2-9b-it,0.707
|
8 |
+
Meta-Llama-3-70B-Instruct,0.6709421285692472
|
9 |
+
gemma-2-2b-it,0.612
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.5276123581208327
|
results/fundamental_nlp_tasks/zero_shot/mrpc.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.7745098039215687
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6740196078431373
|
4 |
+
Qwen2-72B-Instruct,0.7941176470588235
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6764705882352942
|
6 |
+
SeaLLMs-v3-7B-Chat,0.7475490196078431
|
7 |
+
gemma-2-9b-it,0.7450980392156863
|
8 |
+
Meta-Llama-3-70B-Instruct,0.7598039215686274
|
9 |
+
gemma-2-2b-it,0.7132352941176471
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.49264705882352944
|
results/fundamental_nlp_tasks/zero_shot/ocnli.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.6474576271186441
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.42135593220338985
|
4 |
+
Qwen2-72B-Instruct,0.7874576271186441
|
5 |
+
Meta-Llama-3-8B-Instruct,0.4322033898305085
|
6 |
+
SeaLLMs-v3-7B-Chat,0.5613559322033899
|
7 |
+
gemma-2-9b-it,0.6183050847457627
|
8 |
+
Meta-Llama-3-70B-Instruct,0.5928813559322034
|
9 |
+
gemma-2-2b-it,0.4335593220338983
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.4135593220338983
|
results/fundamental_nlp_tasks/zero_shot/qnli.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.8169503935566539
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6027823540179389
|
4 |
+
Qwen2-72B-Instruct,0.8894380377082189
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5689181768259198
|
6 |
+
SeaLLMs-v3-7B-Chat,0.7181036060772469
|
7 |
+
gemma-2-9b-it,0.90481420464946
|
8 |
+
Meta-Llama-3-70B-Instruct,0.876807614863628
|
9 |
+
gemma-2-2b-it,0.779974373055098
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.5652571846970529
|
results/fundamental_nlp_tasks/zero_shot/qqp.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.7771209497897601
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.5058125154588177
|
4 |
+
Qwen2-72B-Instruct,0.7992332426416028
|
5 |
+
Meta-Llama-3-8B-Instruct,0.5512490724709375
|
6 |
+
SeaLLMs-v3-7B-Chat,0.757
|
7 |
+
gemma-2-9b-it,0.761
|
8 |
+
Meta-Llama-3-70B-Instruct,0.7876082117239673
|
9 |
+
gemma-2-2b-it,0.771
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.585
|
results/fundamental_nlp_tasks/zero_shot/rte.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.8411552346570397
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.6895306859205776
|
4 |
+
Qwen2-72B-Instruct,0.8592057761732852
|
5 |
+
Meta-Llama-3-8B-Instruct,0.6028880866425993
|
6 |
+
SeaLLMs-v3-7B-Chat,0.7870036101083032
|
7 |
+
gemma-2-9b-it,0.7472924187725631
|
8 |
+
Meta-Llama-3-70B-Instruct,0.8086642599277978
|
9 |
+
gemma-2-2b-it,0.7003610108303249
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.6209386281588448
|
results/fundamental_nlp_tasks/zero_shot/wnli.csv
CHANGED
@@ -1 +1,10 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Qwen2-7B-Instruct,0.647887323943662
|
3 |
+
Meta-Llama-3.1-8B-Instruct,0.4507042253521127
|
4 |
+
Qwen2-72B-Instruct,0.9014084507042254
|
5 |
+
Meta-Llama-3-8B-Instruct,0.4507042253521127
|
6 |
+
SeaLLMs-v3-7B-Chat,0.6619718309859155
|
7 |
+
gemma-2-9b-it,0.7464788732394366
|
8 |
+
Meta-Llama-3-70B-Instruct,0.7887323943661971
|
9 |
+
gemma-2-2b-it,0.43661971830985913
|
10 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.4788732394366197
|
results/general_reasoning/few_shot/c_eval.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.6183063511830635
|
3 |
+
Meta-Llama-3-8B,0.43773349937733497
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.42092154420921546
|
5 |
+
Meta-Llama-3.1-8B,0.44458281444582815
|
results/general_reasoning/few_shot/cmmlu.csv
CHANGED
@@ -1 +1,5 @@
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B,0.652650664824728
|
3 |
+
Meta-Llama-3-8B,0.4308409601105163
|
4 |
+
llama3-8b-cpt-sea-lionv2-base,0.4389570022448627
|
5 |
+
Meta-Llama-3.1-8B,0.4556207908824037
|