Spaces:
Running
Running
new
Browse files- all_results.json +8 -531
all_results.json
CHANGED
@@ -32277,7 +32277,7 @@
|
|
32277 |
},
|
32278 |
"flores_ind2eng": {
|
32279 |
"prompt_1": {
|
32280 |
-
"bleu_score": 0.
|
32281 |
},
|
32282 |
"prompt_2": {
|
32283 |
"bleu_score": 0.08178489772334997
|
@@ -32294,24 +32294,24 @@
|
|
32294 |
},
|
32295 |
"flores_vie2eng": {
|
32296 |
"prompt_1": {
|
32297 |
-
"bleu_score": 0.
|
32298 |
},
|
32299 |
"prompt_2": {
|
32300 |
-
"bleu_score": 0.
|
32301 |
},
|
32302 |
"prompt_3": {
|
32303 |
-
"bleu_score": 0.
|
32304 |
},
|
32305 |
"prompt_4": {
|
32306 |
-
"bleu_score": 0.
|
32307 |
},
|
32308 |
"prompt_5": {
|
32309 |
-
"bleu_score": 0.
|
32310 |
}
|
32311 |
},
|
32312 |
"flores_zho2eng": {
|
32313 |
"prompt_1": {
|
32314 |
-
"bleu_score": 0.
|
32315 |
},
|
32316 |
"prompt_2": {
|
32317 |
"bleu_score": 0.05134705353375384
|
@@ -32328,7 +32328,7 @@
|
|
32328 |
},
|
32329 |
"flores_zsm2eng": {
|
32330 |
"prompt_1": {
|
32331 |
-
"bleu_score": 0.
|
32332 |
},
|
32333 |
"prompt_2": {
|
32334 |
"bleu_score": 0.08240154342677156
|
@@ -93866,528 +93866,5 @@
|
|
93866 |
"prompt_1": -1
|
93867 |
}
|
93868 |
}
|
93869 |
-
},
|
93870 |
-
"LLaMA_3_Merlion_8B": {
|
93871 |
-
"model_size": "8B",
|
93872 |
-
"model_link": "https://seaeval.github.io/",
|
93873 |
-
"zero_shot": {
|
93874 |
-
"cross_xquad": {
|
93875 |
-
"prompt_1": {
|
93876 |
-
"overall_acc": 0.8876050420168068,
|
93877 |
-
"language_acc": {
|
93878 |
-
"Spanish": 0.8890756302521008,
|
93879 |
-
"English": 0.9277310924369748,
|
93880 |
-
"Chinese": 0.8663865546218488,
|
93881 |
-
"Vietnamese": 0.8672268907563025
|
93882 |
-
},
|
93883 |
-
"consistency_score_2": 0.8428571428571429,
|
93884 |
-
"consistency_score_3": 0.7733193277310924,
|
93885 |
-
"consistency_score_4": 0.7235294117647059,
|
93886 |
-
"detailed_consistency_score": {
|
93887 |
-
"2_combine": {
|
93888 |
-
"Spanish,English": 0.880672268907563,
|
93889 |
-
"Spanish,Chinese": 0.8151260504201681,
|
93890 |
-
"Spanish,Vietnamese": 0.8369747899159664,
|
93891 |
-
"English,Chinese": 0.8571428571428571,
|
93892 |
-
"English,Vietnamese": 0.8512605042016806,
|
93893 |
-
"Chinese,Vietnamese": 0.8159663865546218
|
93894 |
-
},
|
93895 |
-
"3_combine": {
|
93896 |
-
"Spanish,English,Chinese": 0.7840336134453781,
|
93897 |
-
"Spanish,English,Vietnamese": 0.7907563025210084,
|
93898 |
-
"Spanish,Chinese,Vietnamese": 0.7453781512605042,
|
93899 |
-
"English,Chinese,Vietnamese": 0.773109243697479
|
93900 |
-
},
|
93901 |
-
"4_combine": {
|
93902 |
-
"Spanish,English,Chinese,Vietnamese": 0.7235294117647059
|
93903 |
-
}
|
93904 |
-
},
|
93905 |
-
"AC3_2": 0.8646525260084111,
|
93906 |
-
"AC3_3": 0.8265302705456998,
|
93907 |
-
"AC3_4": 0.7972126129915623
|
93908 |
-
},
|
93909 |
-
"prompt_2": {
|
93910 |
-
"overall_acc": 0.8831932773109243,
|
93911 |
-
"language_acc": {
|
93912 |
-
"Spanish": 0.8705882352941177,
|
93913 |
-
"English": 0.9218487394957983,
|
93914 |
-
"Chinese": 0.8705882352941177,
|
93915 |
-
"Vietnamese": 0.8697478991596639
|
93916 |
-
},
|
93917 |
-
"consistency_score_2": 0.838375350140056,
|
93918 |
-
"consistency_score_3": 0.7674369747899159,
|
93919 |
-
"consistency_score_4": 0.7151260504201681,
|
93920 |
-
"detailed_consistency_score": {
|
93921 |
-
"2_combine": {
|
93922 |
-
"Spanish,English": 0.865546218487395,
|
93923 |
-
"Spanish,Chinese": 0.8126050420168067,
|
93924 |
-
"Spanish,Vietnamese": 0.8168067226890756,
|
93925 |
-
"English,Chinese": 0.8613445378151261,
|
93926 |
-
"English,Vietnamese": 0.8621848739495799,
|
93927 |
-
"Chinese,Vietnamese": 0.8117647058823529
|
93928 |
-
},
|
93929 |
-
"3_combine": {
|
93930 |
-
"Spanish,English,Chinese": 0.7773109243697479,
|
93931 |
-
"Spanish,English,Vietnamese": 0.7823529411764706,
|
93932 |
-
"Spanish,Chinese,Vietnamese": 0.7336134453781512,
|
93933 |
-
"English,Chinese,Vietnamese": 0.7764705882352941
|
93934 |
-
},
|
93935 |
-
"4_combine": {
|
93936 |
-
"Spanish,English,Chinese,Vietnamese": 0.7151260504201681
|
93937 |
-
}
|
93938 |
-
},
|
93939 |
-
"AC3_2": 0.8602009368168079,
|
93940 |
-
"AC3_3": 0.8212562153038939,
|
93941 |
-
"AC3_4": 0.790323321708278
|
93942 |
-
},
|
93943 |
-
"prompt_3": {
|
93944 |
-
"overall_acc": 0.8493697478991598,
|
93945 |
-
"language_acc": {
|
93946 |
-
"Spanish": 0.8176470588235294,
|
93947 |
-
"English": 0.9008403361344538,
|
93948 |
-
"Chinese": 0.8403361344537815,
|
93949 |
-
"Vietnamese": 0.838655462184874
|
93950 |
-
},
|
93951 |
-
"consistency_score_2": 0.7768907563025209,
|
93952 |
-
"consistency_score_3": 0.6796218487394958,
|
93953 |
-
"consistency_score_4": 0.6067226890756302,
|
93954 |
-
"detailed_consistency_score": {
|
93955 |
-
"2_combine": {
|
93956 |
-
"Spanish,English": 0.7848739495798319,
|
93957 |
-
"Spanish,Chinese": 0.7411764705882353,
|
93958 |
-
"Spanish,Vietnamese": 0.7394957983193278,
|
93959 |
-
"English,Chinese": 0.8084033613445378,
|
93960 |
-
"English,Vietnamese": 0.8151260504201681,
|
93961 |
-
"Chinese,Vietnamese": 0.7722689075630252
|
93962 |
-
},
|
93963 |
-
"3_combine": {
|
93964 |
-
"Spanish,English,Chinese": 0.680672268907563,
|
93965 |
-
"Spanish,English,Vietnamese": 0.6840336134453782,
|
93966 |
-
"Spanish,Chinese,Vietnamese": 0.6453781512605042,
|
93967 |
-
"English,Chinese,Vietnamese": 0.7084033613445379
|
93968 |
-
},
|
93969 |
-
"4_combine": {
|
93970 |
-
"Spanish,English,Chinese,Vietnamese": 0.6067226890756302
|
93971 |
-
}
|
93972 |
-
},
|
93973 |
-
"AC3_2": 0.8115151343593726,
|
93974 |
-
"AC3_3": 0.7550731339032906,
|
93975 |
-
"AC3_4": 0.7078285476163063
|
93976 |
-
},
|
93977 |
-
"prompt_4": {
|
93978 |
-
"overall_acc": 0.8831932773109245,
|
93979 |
-
"language_acc": {
|
93980 |
-
"Spanish": 0.892436974789916,
|
93981 |
-
"English": 0.9277310924369748,
|
93982 |
-
"Chinese": 0.8638655462184874,
|
93983 |
-
"Vietnamese": 0.8487394957983193
|
93984 |
-
},
|
93985 |
-
"consistency_score_2": 0.8341736694677871,
|
93986 |
-
"consistency_score_3": 0.7602941176470588,
|
93987 |
-
"consistency_score_4": 0.7050420168067227,
|
93988 |
-
"detailed_consistency_score": {
|
93989 |
-
"2_combine": {
|
93990 |
-
"Spanish,English": 0.8798319327731092,
|
93991 |
-
"Spanish,Chinese": 0.8201680672268907,
|
93992 |
-
"Spanish,Vietnamese": 0.8142857142857143,
|
93993 |
-
"English,Chinese": 0.8495798319327731,
|
93994 |
-
"English,Vietnamese": 0.8428571428571429,
|
93995 |
-
"Chinese,Vietnamese": 0.7983193277310925
|
93996 |
-
},
|
93997 |
-
"3_combine": {
|
93998 |
-
"Spanish,English,Chinese": 0.780672268907563,
|
93999 |
-
"Spanish,English,Vietnamese": 0.7756302521008404,
|
94000 |
-
"Spanish,Chinese,Vietnamese": 0.7294117647058823,
|
94001 |
-
"English,Chinese,Vietnamese": 0.7554621848739496
|
94002 |
-
},
|
94003 |
-
"4_combine": {
|
94004 |
-
"Spanish,English,Chinese,Vietnamese": 0.7050420168067227
|
94005 |
-
}
|
94006 |
-
},
|
94007 |
-
"AC3_2": 0.8579838785447016,
|
94008 |
-
"AC3_3": 0.8171485287980964,
|
94009 |
-
"AC3_4": 0.7841260948330266
|
94010 |
-
},
|
94011 |
-
"prompt_5": {
|
94012 |
-
"overall_acc": 0.8684873949579832,
|
94013 |
-
"language_acc": {
|
94014 |
-
"Spanish": 0.8680672268907563,
|
94015 |
-
"English": 0.9168067226890756,
|
94016 |
-
"Chinese": 0.846218487394958,
|
94017 |
-
"Vietnamese": 0.8428571428571429
|
94018 |
-
},
|
94019 |
-
"consistency_score_2": 0.8084033613445377,
|
94020 |
-
"consistency_score_3": 0.7252100840336135,
|
94021 |
-
"consistency_score_4": 0.6638655462184874,
|
94022 |
-
"detailed_consistency_score": {
|
94023 |
-
"2_combine": {
|
94024 |
-
"Spanish,English": 0.8453781512605042,
|
94025 |
-
"Spanish,Chinese": 0.788235294117647,
|
94026 |
-
"Spanish,Vietnamese": 0.7907563025210084,
|
94027 |
-
"English,Chinese": 0.826890756302521,
|
94028 |
-
"English,Vietnamese": 0.8226890756302521,
|
94029 |
-
"Chinese,Vietnamese": 0.7764705882352941
|
94030 |
-
},
|
94031 |
-
"3_combine": {
|
94032 |
-
"Spanish,English,Chinese": 0.7420168067226891,
|
94033 |
-
"Spanish,English,Vietnamese": 0.7403361344537815,
|
94034 |
-
"Spanish,Chinese,Vietnamese": 0.6932773109243697,
|
94035 |
-
"English,Chinese,Vietnamese": 0.7252100840336134
|
94036 |
-
},
|
94037 |
-
"4_combine": {
|
94038 |
-
"Spanish,English,Chinese,Vietnamese": 0.6638655462184874
|
94039 |
-
}
|
94040 |
-
},
|
94041 |
-
"AC3_2": 0.8373689540463608,
|
94042 |
-
"AC3_3": 0.7904082486704036,
|
94043 |
-
"AC3_4": 0.7525144414280582
|
94044 |
-
}
|
94045 |
-
},
|
94046 |
-
"cross_mmlu": {
|
94047 |
-
"prompt_1": -1,
|
94048 |
-
"prompt_2": -1,
|
94049 |
-
"prompt_3": -1,
|
94050 |
-
"prompt_4": -1,
|
94051 |
-
"prompt_5": -1
|
94052 |
-
},
|
94053 |
-
"cross_logiqa": {
|
94054 |
-
"prompt_1": -1,
|
94055 |
-
"prompt_2": -1,
|
94056 |
-
"prompt_3": -1,
|
94057 |
-
"prompt_4": -1,
|
94058 |
-
"prompt_5": -1
|
94059 |
-
},
|
94060 |
-
"sg_eval": {
|
94061 |
-
"prompt_1": {
|
94062 |
-
"accuracy": 0.5533980582524272
|
94063 |
-
},
|
94064 |
-
"prompt_2": {
|
94065 |
-
"accuracy": 0.6213592233009708
|
94066 |
-
},
|
94067 |
-
"prompt_3": {
|
94068 |
-
"accuracy": 0.6310679611650486
|
94069 |
-
},
|
94070 |
-
"prompt_4": {
|
94071 |
-
"accuracy": 0.5436893203883495
|
94072 |
-
},
|
94073 |
-
"prompt_5": {
|
94074 |
-
"accuracy": 0.5631067961165048
|
94075 |
-
}
|
94076 |
-
},
|
94077 |
-
"cn_eval": {
|
94078 |
-
"prompt_1": -1,
|
94079 |
-
"prompt_2": -1,
|
94080 |
-
"prompt_3": -1,
|
94081 |
-
"prompt_4": -1,
|
94082 |
-
"prompt_5": -1
|
94083 |
-
},
|
94084 |
-
"us_eval": {
|
94085 |
-
"prompt_1": -1,
|
94086 |
-
"prompt_2": -1,
|
94087 |
-
"prompt_3": -1,
|
94088 |
-
"prompt_4": -1,
|
94089 |
-
"prompt_5": -1
|
94090 |
-
},
|
94091 |
-
"ph_eval": {
|
94092 |
-
"prompt_1": -1,
|
94093 |
-
"prompt_2": -1,
|
94094 |
-
"prompt_3": -1,
|
94095 |
-
"prompt_4": -1,
|
94096 |
-
"prompt_5": -1
|
94097 |
-
},
|
94098 |
-
"sing2eng": {
|
94099 |
-
"prompt_1": -1,
|
94100 |
-
"prompt_2": -1,
|
94101 |
-
"prompt_3": -1,
|
94102 |
-
"prompt_4": -1,
|
94103 |
-
"prompt_5": -1
|
94104 |
-
},
|
94105 |
-
"indommlu": {
|
94106 |
-
"prompt_1": -1,
|
94107 |
-
"prompt_2": -1,
|
94108 |
-
"prompt_3": -1,
|
94109 |
-
"prompt_4": -1,
|
94110 |
-
"prompt_5": -1
|
94111 |
-
},
|
94112 |
-
"flores_ind2eng": {
|
94113 |
-
"prompt_1": -1,
|
94114 |
-
"prompt_2": -1,
|
94115 |
-
"prompt_3": -1,
|
94116 |
-
"prompt_4": -1,
|
94117 |
-
"prompt_5": -1
|
94118 |
-
},
|
94119 |
-
"flores_vie2eng": {
|
94120 |
-
"prompt_1": -1,
|
94121 |
-
"prompt_2": -1,
|
94122 |
-
"prompt_3": -1,
|
94123 |
-
"prompt_4": -1,
|
94124 |
-
"prompt_5": -1
|
94125 |
-
},
|
94126 |
-
"flores_zho2eng": {
|
94127 |
-
"prompt_1": -1,
|
94128 |
-
"prompt_2": -1,
|
94129 |
-
"prompt_3": -1,
|
94130 |
-
"prompt_4": -1,
|
94131 |
-
"prompt_5": -1
|
94132 |
-
},
|
94133 |
-
"flores_zsm2eng": {
|
94134 |
-
"prompt_1": -1,
|
94135 |
-
"prompt_2": -1,
|
94136 |
-
"prompt_3": -1,
|
94137 |
-
"prompt_4": -1,
|
94138 |
-
"prompt_5": -1
|
94139 |
-
},
|
94140 |
-
"mmlu": {
|
94141 |
-
"prompt_1": -1,
|
94142 |
-
"prompt_2": -1,
|
94143 |
-
"prompt_3": -1,
|
94144 |
-
"prompt_4": -1,
|
94145 |
-
"prompt_5": -1
|
94146 |
-
},
|
94147 |
-
"mmlu_full": {
|
94148 |
-
"prompt_1": -1,
|
94149 |
-
"prompt_2": -1,
|
94150 |
-
"prompt_3": -1,
|
94151 |
-
"prompt_4": -1,
|
94152 |
-
"prompt_5": -1
|
94153 |
-
},
|
94154 |
-
"c_eval": {
|
94155 |
-
"prompt_1": -1,
|
94156 |
-
"prompt_2": -1,
|
94157 |
-
"prompt_3": -1,
|
94158 |
-
"prompt_4": -1,
|
94159 |
-
"prompt_5": -1
|
94160 |
-
},
|
94161 |
-
"c_eval_full": {
|
94162 |
-
"prompt_1": -1,
|
94163 |
-
"prompt_2": -1,
|
94164 |
-
"prompt_3": -1,
|
94165 |
-
"prompt_4": -1,
|
94166 |
-
"prompt_5": -1
|
94167 |
-
},
|
94168 |
-
"cmmlu": {
|
94169 |
-
"prompt_1": -1,
|
94170 |
-
"prompt_2": -1,
|
94171 |
-
"prompt_3": -1,
|
94172 |
-
"prompt_4": -1,
|
94173 |
-
"prompt_5": -1
|
94174 |
-
},
|
94175 |
-
"cmmlu_full": {
|
94176 |
-
"prompt_1": -1,
|
94177 |
-
"prompt_2": -1,
|
94178 |
-
"prompt_3": -1,
|
94179 |
-
"prompt_4": -1,
|
94180 |
-
"prompt_5": -1
|
94181 |
-
},
|
94182 |
-
"zbench": {
|
94183 |
-
"prompt_1": -1,
|
94184 |
-
"prompt_2": -1,
|
94185 |
-
"prompt_3": -1,
|
94186 |
-
"prompt_4": -1,
|
94187 |
-
"prompt_5": -1
|
94188 |
-
},
|
94189 |
-
"ind_emotion": {
|
94190 |
-
"prompt_1": -1,
|
94191 |
-
"prompt_2": -1,
|
94192 |
-
"prompt_3": -1,
|
94193 |
-
"prompt_4": -1,
|
94194 |
-
"prompt_5": -1
|
94195 |
-
},
|
94196 |
-
"ocnli": {
|
94197 |
-
"prompt_1": -1,
|
94198 |
-
"prompt_2": -1,
|
94199 |
-
"prompt_3": -1,
|
94200 |
-
"prompt_4": -1,
|
94201 |
-
"prompt_5": -1
|
94202 |
-
},
|
94203 |
-
"c3": {
|
94204 |
-
"prompt_1": -1,
|
94205 |
-
"prompt_2": -1,
|
94206 |
-
"prompt_3": -1,
|
94207 |
-
"prompt_4": -1,
|
94208 |
-
"prompt_5": -1
|
94209 |
-
},
|
94210 |
-
"dream": {
|
94211 |
-
"prompt_1": -1,
|
94212 |
-
"prompt_2": -1,
|
94213 |
-
"prompt_3": -1,
|
94214 |
-
"prompt_4": -1,
|
94215 |
-
"prompt_5": -1
|
94216 |
-
},
|
94217 |
-
"samsum": {
|
94218 |
-
"prompt_1": -1,
|
94219 |
-
"prompt_2": -1,
|
94220 |
-
"prompt_3": -1,
|
94221 |
-
"prompt_4": -1,
|
94222 |
-
"prompt_5": -1
|
94223 |
-
},
|
94224 |
-
"dialogsum": {
|
94225 |
-
"prompt_1": -1,
|
94226 |
-
"prompt_2": -1,
|
94227 |
-
"prompt_3": -1,
|
94228 |
-
"prompt_4": -1,
|
94229 |
-
"prompt_5": -1
|
94230 |
-
},
|
94231 |
-
"sst2": {
|
94232 |
-
"prompt_1": -1,
|
94233 |
-
"prompt_2": -1,
|
94234 |
-
"prompt_3": -1,
|
94235 |
-
"prompt_4": -1,
|
94236 |
-
"prompt_5": -1
|
94237 |
-
},
|
94238 |
-
"cola": {
|
94239 |
-
"prompt_1": -1,
|
94240 |
-
"prompt_2": -1,
|
94241 |
-
"prompt_3": -1,
|
94242 |
-
"prompt_4": -1,
|
94243 |
-
"prompt_5": -1
|
94244 |
-
},
|
94245 |
-
"qqp": {
|
94246 |
-
"prompt_1": -1,
|
94247 |
-
"prompt_2": -1,
|
94248 |
-
"prompt_3": -1,
|
94249 |
-
"prompt_4": -1,
|
94250 |
-
"prompt_5": -1
|
94251 |
-
},
|
94252 |
-
"mnli": {
|
94253 |
-
"prompt_1": -1,
|
94254 |
-
"prompt_2": -1,
|
94255 |
-
"prompt_3": -1,
|
94256 |
-
"prompt_4": -1,
|
94257 |
-
"prompt_5": -1
|
94258 |
-
},
|
94259 |
-
"qnli": {
|
94260 |
-
"prompt_1": -1,
|
94261 |
-
"prompt_2": -1,
|
94262 |
-
"prompt_3": -1,
|
94263 |
-
"prompt_4": -1,
|
94264 |
-
"prompt_5": -1
|
94265 |
-
},
|
94266 |
-
"wnli": {
|
94267 |
-
"prompt_1": -1,
|
94268 |
-
"prompt_2": -1,
|
94269 |
-
"prompt_3": -1,
|
94270 |
-
"prompt_4": -1,
|
94271 |
-
"prompt_5": -1
|
94272 |
-
},
|
94273 |
-
"rte": {
|
94274 |
-
"prompt_1": -1,
|
94275 |
-
"prompt_2": -1,
|
94276 |
-
"prompt_3": -1,
|
94277 |
-
"prompt_4": -1,
|
94278 |
-
"prompt_5": -1
|
94279 |
-
},
|
94280 |
-
"mrpc": {
|
94281 |
-
"prompt_1": -1,
|
94282 |
-
"prompt_2": -1,
|
94283 |
-
"prompt_3": -1,
|
94284 |
-
"prompt_4": -1,
|
94285 |
-
"prompt_5": -1
|
94286 |
-
}
|
94287 |
-
},
|
94288 |
-
"five_shot": {
|
94289 |
-
"cross_xquad": {
|
94290 |
-
"prompt_1": -1
|
94291 |
-
},
|
94292 |
-
"cross_mmlu": {
|
94293 |
-
"prompt_1": -1
|
94294 |
-
},
|
94295 |
-
"cross_logiqa": {
|
94296 |
-
"prompt_1": -1
|
94297 |
-
},
|
94298 |
-
"sg_eval": {
|
94299 |
-
"prompt_1": -1
|
94300 |
-
},
|
94301 |
-
"cn_eval": {
|
94302 |
-
"prompt_1": -1
|
94303 |
-
},
|
94304 |
-
"us_eval": {
|
94305 |
-
"prompt_1": -1
|
94306 |
-
},
|
94307 |
-
"ph_eval": {
|
94308 |
-
"prompt_1": -1
|
94309 |
-
},
|
94310 |
-
"sing2eng": {
|
94311 |
-
"prompt_1": -1
|
94312 |
-
},
|
94313 |
-
"indommlu": {
|
94314 |
-
"prompt_1": -1
|
94315 |
-
},
|
94316 |
-
"flores_ind2eng": {
|
94317 |
-
"prompt_1": -1
|
94318 |
-
},
|
94319 |
-
"flores_vie2eng": {
|
94320 |
-
"prompt_1": -1
|
94321 |
-
},
|
94322 |
-
"flores_zho2eng": {
|
94323 |
-
"prompt_1": -1
|
94324 |
-
},
|
94325 |
-
"flores_zsm2eng": {
|
94326 |
-
"prompt_1": -1
|
94327 |
-
},
|
94328 |
-
"mmlu": {
|
94329 |
-
"prompt_1": -1
|
94330 |
-
},
|
94331 |
-
"mmlu_full": {
|
94332 |
-
"prompt_1": -1
|
94333 |
-
},
|
94334 |
-
"c_eval": {
|
94335 |
-
"prompt_1": -1
|
94336 |
-
},
|
94337 |
-
"c_eval_full": {
|
94338 |
-
"prompt_1": -1
|
94339 |
-
},
|
94340 |
-
"cmmlu": {
|
94341 |
-
"prompt_1": -1
|
94342 |
-
},
|
94343 |
-
"cmmlu_full": {
|
94344 |
-
"prompt_1": -1
|
94345 |
-
},
|
94346 |
-
"zbench": {
|
94347 |
-
"prompt_1": -1
|
94348 |
-
},
|
94349 |
-
"ind_emotion": {
|
94350 |
-
"prompt_1": -1
|
94351 |
-
},
|
94352 |
-
"ocnli": {
|
94353 |
-
"prompt_1": -1
|
94354 |
-
},
|
94355 |
-
"c3": {
|
94356 |
-
"prompt_1": -1
|
94357 |
-
},
|
94358 |
-
"dream": {
|
94359 |
-
"prompt_1": -1
|
94360 |
-
},
|
94361 |
-
"samsum": {
|
94362 |
-
"prompt_1": -1
|
94363 |
-
},
|
94364 |
-
"dialogsum": {
|
94365 |
-
"prompt_1": -1
|
94366 |
-
},
|
94367 |
-
"sst2": {
|
94368 |
-
"prompt_1": -1
|
94369 |
-
},
|
94370 |
-
"cola": {
|
94371 |
-
"prompt_1": -1
|
94372 |
-
},
|
94373 |
-
"qqp": {
|
94374 |
-
"prompt_1": -1
|
94375 |
-
},
|
94376 |
-
"mnli": {
|
94377 |
-
"prompt_1": -1
|
94378 |
-
},
|
94379 |
-
"qnli": {
|
94380 |
-
"prompt_1": -1
|
94381 |
-
},
|
94382 |
-
"wnli": {
|
94383 |
-
"prompt_1": -1
|
94384 |
-
},
|
94385 |
-
"rte": {
|
94386 |
-
"prompt_1": -1
|
94387 |
-
},
|
94388 |
-
"mrpc": {
|
94389 |
-
"prompt_1": -1
|
94390 |
-
}
|
94391 |
-
}
|
94392 |
}
|
94393 |
}
|
|
|
32277 |
},
|
32278 |
"flores_ind2eng": {
|
32279 |
"prompt_1": {
|
32280 |
+
"bleu_score": 0.07998569558291352
|
32281 |
},
|
32282 |
"prompt_2": {
|
32283 |
"bleu_score": 0.08178489772334997
|
|
|
32294 |
},
|
32295 |
"flores_vie2eng": {
|
32296 |
"prompt_1": {
|
32297 |
+
"bleu_score": 0.06595796158495684
|
32298 |
},
|
32299 |
"prompt_2": {
|
32300 |
+
"bleu_score": 0.0659317494666598
|
32301 |
},
|
32302 |
"prompt_3": {
|
32303 |
+
"bleu_score": 0.06631225923987717
|
32304 |
},
|
32305 |
"prompt_4": {
|
32306 |
+
"bleu_score": 0.06562445607881094
|
32307 |
},
|
32308 |
"prompt_5": {
|
32309 |
+
"bleu_score": 0.07261211813311091
|
32310 |
}
|
32311 |
},
|
32312 |
"flores_zho2eng": {
|
32313 |
"prompt_1": {
|
32314 |
+
"bleu_score": 0.0502826099860938
|
32315 |
},
|
32316 |
"prompt_2": {
|
32317 |
"bleu_score": 0.05134705353375384
|
|
|
32328 |
},
|
32329 |
"flores_zsm2eng": {
|
32330 |
"prompt_1": {
|
32331 |
+
"bleu_score": 0.08113017898863324
|
32332 |
},
|
32333 |
"prompt_2": {
|
32334 |
"bleu_score": 0.08240154342677156
|
|
|
93866 |
"prompt_1": -1
|
93867 |
}
|
93868 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93869 |
}
|
93870 |
}
|