diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..d82be433abe592079dc9ce67ec7e97fe668c8590 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood @@ -0,0 +1 @@ +ffa6e39a35a16299dcb015f17f986aaa598ad8b4840c4cebe0339a7042232741 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..f217448594199a54d671be7302857509eb6d691f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json @@ -0,0 +1 @@ +{"results": {"arc_easy": {"acc": 0.2474747474747475, "acc_norm": 0.24074074074074073, "acc_norm_stderr": 0.008772796145221907, "acc_stderr": 0.008855114414834707}}, "versions": {"arc_easy": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1bbb3eb0c26b177cb739f58d8098b339278fcd84 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json @@ -0,0 +1 @@ +{"results": {"arithmetic_3da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3da": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..b52790c74b649b455fd90ca93cc70ad23c3d129b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-loglikelihood @@ -0,0 +1 @@ +d3557beb8b9e5704122c2fc6362b11fbe2c3f2f3cb72aed4462b208767c40e01 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..07106a905853aad9876257f308e3af5900066253 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood @@ -0,0 +1 @@ +2a84231e7b79f517427e57e2099c88fed3d60a7efab4ef9506e263b4091d5cfa \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_complex_NP_island-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_complex_NP_island-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..5bfbffb6e4c931490930f37e256e5f2ed3892cec --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_complex_NP_island-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_complex_NP_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_complex_NP_island": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..cf08b036b9eccc0d0151cb41a6ec0c4eeede2f91 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_distractor_agreement_relative_clause": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_distractor_agreement_relative_clause": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..076319f01e4309fae1bebb80834d35ebdebec6ec --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_existential_there_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_1": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..31772c9a1cc093da4efd09f298d98c26c7fe8383 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood @@ -0,0 +1 @@ +ceede5b38248a62125a74a8332602b8eac5ef40864f071ad8d86e7971e07219d \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..a5c4bc6ca2b4f3624dd5781c58efee26c100c3af --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood @@ -0,0 +1 @@ +a3a702a3335c79b02b36caf37c68069050c2a8a3a03c3610c09afc39d2b83fb1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4fba717b88b566130bd8dbd52dd0da2d5a65ee17 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_matrix_question_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_matrix_question_npi_licensor_present": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..efe40ced37f6a7890d247b0292e80d55dde1849c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_npi_present_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_2": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_licensor_present-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_licensor_present-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..321702a66e7f2a1e762a4f9b9ae4b99a6f813c3b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_only_npi_licensor_present-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_only_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_only_npi_licensor_present": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..d667f4694632d514448e58d30d7e2f051b5b707b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-loglikelihood @@ -0,0 +1 @@ +755bdfe2c89737c43001ff1dc83d68ad33e444aaf0669af66aaf82dcd09f2eca \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..87b49c5de9f79253e3cfa34ad3e6fb5c8d8a7b06 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood @@ -0,0 +1 @@ +7c2ed82612af9175052cd44d8e178b6dd084c04eb462a3d88fcacfad2df8be8e \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_2-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_2-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1bda1a2aa9c1eeee68b3ca88f2de38cbb8e5d67b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_principle_A_domain_2-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_principle_A_domain_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_2": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..0a32ca7f971e537ab6fc6d338db3ad1c3d506f64 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood @@ -0,0 +1 @@ +5bc0441f31e32443cf761bca6e961d504e1e84b15aa4e1d79e5c8ed5b4c2aa3a \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..a26cb174a06e1941ae79e137161d85c4f5814838 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood @@ -0,0 +1 @@ +973fe56534fdef1207f0fc08dd09a210304c55f33c6cbb17552754bf54f11c86 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..d2c99ab803288212934142c2507a8c316695a34b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json @@ -0,0 +1 @@ +{"results": {"blimp_transitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_transitive": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..c3e6af12f2da0a1857c0f0456bf4052d5558329e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood @@ -0,0 +1 @@ +4d4aaa0274ccd485ff8430ed61b8f83806febe18c16616c7d050f637a0463eba \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..4c15f2283eb93c5ab4b9cdbddf3e91117211918d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood @@ -0,0 +1 @@ +d41a9b85e4c31e445bf9b46b8642df02203ccc02b4a9b254bf76066d5c54b4b7 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_autre-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_autre-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..ab0d9a4db42a5e4da196834b40457a95bf9a9129 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_autre-v0-loglikelihood @@ -0,0 +1 @@ +a197ccc8538231404a8e43f5ed0fbbfb2c317b4da337f6e7aa9642131aeb426a \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..75d356522edf08f93f03d3ba37ed323d39f5b35e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_race_color-v0-res.json @@ -0,0 +1 @@ +{"results": {"crows_pairs_english_race_color": {"likelihood_difference": 0.3322827903840805, "likelihood_difference_stderr": 0.01019838186372816, "pct_stereotype": 0.4822834645669291, "pct_stereotype_stderr": 0.022191835500120254}}, "versions": {"crows_pairs_english_race_color": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..b56bc901ca48380f5a188f9c18ef12ba0abe49ca --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-loglikelihood @@ -0,0 +1 @@ +2ed57377174adaf0fb30037eb055eafdd02cd46e57bc32066d5fecd90a14b6e1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..670f2d2cffeac37f0510e17d7195a0a68700d4fe --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_religion-v0-res.json @@ -0,0 +1 @@ +{"results": {"crows_pairs_english_religion": {"likelihood_difference": 0.32170622542430666, "likelihood_difference_stderr": 0.022101541392310232, "pct_stereotype": 0.43243243243243246, "pct_stereotype_stderr": 0.04723583229758394}}, "versions": {"crows_pairs_english_religion": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_socioeconomic-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_socioeconomic-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..89bd7338ada6ff7ef485492c5656342881b70600 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_english_socioeconomic-v0-res.json @@ -0,0 +1 @@ +{"results": {"crows_pairs_english_socioeconomic": {"likelihood_difference": 0.3424577735757881, "likelihood_difference_stderr": 0.017459994170011896, "pct_stereotype": 0.46842105263157896, "pct_stereotype_stderr": 0.036297038088316094}}, "versions": {"crows_pairs_english_socioeconomic": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..44d8ff96e413cf6eb458a896d47321a0f3996b70 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-res.json @@ -0,0 +1 @@ +{"results": {"crows_pairs_french_autre": {"likelihood_difference": 0.3517045997290783, "likelihood_difference_stderr": 0.07647821858130377, "pct_stereotype": 0.23076923076923078, "pct_stereotype_stderr": 0.12162606385262997}}, "versions": {"crows_pairs_french_autre": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..167b5e3ba055d1d67ca70e4f9cd3879f6b40b179 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/crows_pairs_french_physical_appearance-v0-loglikelihood @@ -0,0 +1 @@ +ea61eaad64e9292790d4bbef955ffeebed7a595de098bc5ac726a6e51f27f9af \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..857af346b47d7ce11ee4192b928608a2111776f4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json @@ -0,0 +1 @@ +{"results": {"ethics_utilitarianism": {"acc": 0.49771214642262895, "acc_stderr": 0.007211546310787838}}, "versions": {"ethics_utilitarianism": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl new file mode 100644 index 0000000000000000000000000000000000000000..657a1621f425215826e84cbc025ce12554480a6e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad18c6203e8b3eda1b88f8dfd7d197c4053c07640b0542fcdd8170e9b3bd2d30 +size 2479 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..6be94a640950b2451775fddccbf80060c4a673b0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json @@ -0,0 +1 @@ +{"results": {"hellaswag": {"acc": 0.24965146385182235, "acc_norm": 0.24756024696275641, "acc_norm_stderr": 0.004307128573285236, "acc_stderr": 0.004319267432460666}}, "versions": {"hellaswag": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..a7ae5fa705e58cf0e7c06ca0fe84a186d24b506f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood @@ -0,0 +1 @@ +bf05e04ed8cf61cf3aad294ed3f5a16137775ffdd20f1b129022ddffc1251768 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..6705b9cad27c7f1eb647b513861646faaccad584 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-college_biology-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-college_biology": {"acc": 0.24305555555555555, "acc_norm": 0.2361111111111111, "acc_norm_stderr": 0.03551446610810826, "acc_stderr": 0.03586879280080341}}, "versions": {"hendrycksTest-college_biology": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..e281f72feb428451f27dbaba80408c468ef51bce --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood @@ -0,0 +1 @@ +6b21f5cd5606268421a667152ec989424b66905c02adbab8d4ff6bb9d21b77d1 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..49a780bc97953db32716ccc580390c5d21cfc252 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood @@ -0,0 +1 @@ +dae59e82d3d4d8dec82239d9620b72cc47bb6efbe2f1c2f9b9d23e849c9c5e32 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4c6a21d7dac4cd7b6fa217e8bebf34d959554a7a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-high_school_statistics": {"acc": 0.2962962962962963, "acc_norm": 0.3055555555555556, "acc_norm_stderr": 0.03141554629402544, "acc_stderr": 0.03114144782353604}}, "versions": {"hendrycksTest-high_school_statistics": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..228dfe072cd02f94bced495f271c5cc108850719 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood @@ -0,0 +1 @@ +1c8b994bd9a63ec874fc8d0e3a27077118b7adc472306b2fd6c55635a78b9d52 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-miscellaneous-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-miscellaneous-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..5c7859eb3a80a849deee7d67d37f71a84c8eeaf6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-miscellaneous-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-miscellaneous": {"acc": 0.23499361430395913, "acc_norm": 0.2515964240102171, "acc_norm_stderr": 0.015517322365529622, "acc_stderr": 0.015162024152278445}}, "versions": {"hendrycksTest-miscellaneous": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_disputes-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_disputes-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..26ea1c2a75ccfb96af880ee30eef11520e9ea39c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-moral_disputes-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-moral_disputes": {"acc": 0.24855491329479767, "acc_norm": 0.27167630057803466, "acc_norm_stderr": 0.023948512905468365, "acc_stderr": 0.023267528432100174}}, "versions": {"hendrycksTest-moral_disputes": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..8711cf195e4fa92606a47c1b7c701643f0ef483e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json @@ -0,0 +1 @@ +{"results": {"hendrycksTest-sociology": {"acc": 0.23383084577114427, "acc_norm": 0.24875621890547264, "acc_norm_stderr": 0.030567675938916707, "acc_stderr": 0.02992941540834838}}, "versions": {"hendrycksTest-sociology": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..3555c2c5351eb369bf0dc9cfedf93f0bbc3de7b4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-loglikelihood @@ -0,0 +1 @@ +0ffa491f7bad2abbb64ecd752a295729167599b3815238cab0ecf4cb08bba9b6 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..30fcb907b5dbbabb2af4cf3a156cf18c67d387df --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json @@ -0,0 +1 @@ +{"results": {"lambada_openai": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_openai": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..ae19de0e6951bd90cd1e713d14816767496044e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-loglikelihood @@ -0,0 +1 @@ +5ad125e1708499832b2cee8c3388f89f9c0277010fd96fbd3359039ce8105984 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..3c444f66611959e4c13451d306fba403261ecfbb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-loglikelihood @@ -0,0 +1 @@ +5d16f4a0c51dc6d7b6df2ebeba2bbfa51e700b843779b559b3d90183d7b02a11 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_standard-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_standard-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1f15d0be56b5edf18ad7cc2bec4977fae99f060b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/lambada_standard-v0-res.json @@ -0,0 +1 @@ +{"results": {"lambada_standard": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_standard": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..8ee1d031de8ec7d2af61c83567d433f9116ba24d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-res.json @@ -0,0 +1 @@ +{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..eb6851fc63ff08c657743ef6abf5073ba73144e5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_geometry-v1-res.json @@ -0,0 +1 @@ +{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..63ab45b9ff890a0ef7c2108133b23bf0043f13f8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v1-res.json @@ -0,0 +1 @@ +{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..f0ce5c64580d1132710e596cc287126ba77394e6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/mc_taco-v0-loglikelihood @@ -0,0 +1 @@ +1811808ef05afd5f30ffc3471622a3dd7a1b681b17a2f7616695ad6b2a45943c \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..3fb242da3a2d274cbcc84bf86a6bb11f02df27ab --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-loglikelihood @@ -0,0 +1 @@ +3784acf322e79f31702a7a0612030e4ba5c4fc466ad976a34ee3f3d7278c01f0 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/mutual_plus-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/mutual_plus-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..f4ba9d37310a19cc7928fd0d599776d8a9da8dba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/mutual_plus-v0-loglikelihood @@ -0,0 +1 @@ +b846bb9db109535f59a93d1ce340cf09f68bdf4fed5b8decd168784220fe07fa \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_books3-v0-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_books3-v0-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..b483d3b45b43abddd6cbd169a8afda8d3f803d9c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_books3-v0-loglikelihood_rolling @@ -0,0 +1 @@ +0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..860aa06c974e58d03f54ab1d9cb14c7e98019d4e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-res.json @@ -0,0 +1 @@ +{"results": {"pile_dm-mathematics": {"bits_per_byte": 6.176600873627999e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..80272607557f6e0c97220efa30c8b9ad38f52aa8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-loglikelihood_rolling @@ -0,0 +1 @@ +e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..b948f0d3691443f50c9f9d5ae24804b0c7e79aaa --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_europarl-v1-res.json @@ -0,0 +1 @@ +{"results": {"pile_europarl": {"bits_per_byte": 1.2477664839621123e-05, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..cf8251e4f68e2e893624142031e80d4d5777f4f2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_github-v1-loglikelihood_rolling @@ -0,0 +1 @@ +df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_hackernews-v1-loglikelihood_rolling b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_hackernews-v1-loglikelihood_rolling new file mode 100644 index 0000000000000000000000000000000000000000..48b767bfe706bb035e4553ea9c4119347303bab9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_hackernews-v1-loglikelihood_rolling @@ -0,0 +1 @@ +ec1082ee5a5326e0d57aa4e73b634937140c1de9af95f154e8ab57b05d9b422b \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v1-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v1-res.json new file mode 100644 index 0000000000000000000000000000000000000000..599ae44ef430af958ab53c57d0b7900928ad243a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pile_uspto-v1-res.json @@ -0,0 +1 @@ +{"results": {"pile_uspto": {"bits_per_byte": 0.000174024142670342, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 1}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/pubmedqa-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/pubmedqa-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..97db87ce2be9b3d2c08479ee73c7ba3923817795 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/pubmedqa-v0-loglikelihood @@ -0,0 +1 @@ +7a04a1fb1d2b19db84fd15c224015d6c0306a41195a4e71fe6abd48fb4d53b9f \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..0e67fac5f7d54c19e42cae4cfc850089c7c61187 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-loglikelihood @@ -0,0 +1 @@ +7e17261820acb365966cb9431d93aec983b14393eaeefbc96e30a11cf58bc6df \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..ecc86dc396332c1aaa8e638e5413633a504e7206 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood @@ -0,0 +1 @@ +97b551b0fc3d239aad4929a2e8e79c986891aefd9fcd19441fea0382d507889e \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..4844e5393b8358d225f516f1a948f1deccab7840 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-greedy_until @@ -0,0 +1 @@ +6c48baa6924f3635120f33062251c4b571b3d4e9fe46b14d91f54ddd1c857997 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5f507f6745120414ba5cfd39fc92eac4e48424 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/random_insertion-v0-res.json @@ -0,0 +1 @@ +{"results": {"random_insertion": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"random_insertion": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..41300bc19fd3142bfd547bf21f2b28b3ce5b21c9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v0-loglikelihood @@ -0,0 +1 @@ +287e87cc6878debcc80d9b6df4e2d0a74ed29068e0e0a80906c8441843a17cee \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v1-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v1-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..70df2fd6ae1f59de5b6f3f6712bc2331197400c8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/squad2-v1-greedy_until @@ -0,0 +1 @@ +e17e3d85c1d5adaf2d6b4b752c4babc2e0b3a6e144e6de70cb3b2287e85109b8 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl new file mode 100644 index 0000000000000000000000000000000000000000..258d73cd68b190d87670edd3c11210c97e59ab91 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0a3c44334dc2b7c48aa448d0a2c2ffde3c9a28e6c29d4ed175cbb22334bef3 +size 1805 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1c627edfd96299ad364c96a4eae2ac15f4acea88 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36023aa22487e0d2de08cb3ecabd0cdbd6c887c63c7006b3544b7809bfcb58bc +size 1806 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6b3a1d3f57a5a19f012439a4eb611af6a7f22ea7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9278fa1ee2540397f38cc755be8cad1277c51dc92d91aeea8c4ba1a26eb8490 +size 1773 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d212dc33727c51da8f2ea6fe29c2057b70d32c2f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9dd70bef30b58d7c45a64ce10e7eb8ed66df51cbddf24ae8ed37f6c9104b024 +size 1813 diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..9f0fdc76cab096c80a87295773054510803ba218 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/webqs-v0-res.json @@ -0,0 +1 @@ +{"results": {"webqs": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"webqs": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/winogrande-v0-loglikelihood b/scripts/yans/lm-evaluation-harness/tests/testdata/winogrande-v0-loglikelihood new file mode 100644 index 0000000000000000000000000000000000000000..97866f6ce45cb9a213d27310a78b7cdeab23bc9a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/winogrande-v0-loglikelihood @@ -0,0 +1 @@ +90a3eff49de9173964d46f5ed57bcf9a78a72dd1bfe0e5323b25cebb40b49ea9 \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa13f02854c8eec0591be980486afe48d7f97a9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt14-en-fr": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.011284118461117099, "chrf_stderr": 7.340651275964445e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-en-fr": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..5261876f55a69dcaf33b3842690f81c12eb42f3a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt14-fr-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01275083169440515, "chrf_stderr": 8.45474998563806e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-fr-en": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..75f1072b6e7f2bdd9ecd98987c86fefd3375fb6d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-greedy_until @@ -0,0 +1 @@ +d30e23e38d9a45b9c31e1dfd14b58d0b7020df4b9c8a1c697aa6bc5fba8ce08a \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..88bee7ffa69b1bf7accdd56a3870f61d4c0453da --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt16-en-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010909486120840577, "chrf_stderr": 0.000122611124711072, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-en-de": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..13bfd5b552b92b771266666dd5fe5b9496064051 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt20-en-pl": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.009006977773147825, "chrf_stderr": 0.00023387733367766675, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-pl": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..af339eda5d3d76e00e3e0f3c800353bb2b7fb696 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt20-en-ru": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0007327811114614671, "chrf_stderr": 4.43155903515048e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ru": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-greedy_until b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-greedy_until new file mode 100644 index 0000000000000000000000000000000000000000..7353ad4475b3d292bfd64e6dcb41972d697c34da --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-greedy_until @@ -0,0 +1 @@ +8a4b65c59dcac6591d46261909ee92ebcf41c19ee7442b12842302b2d8aeb36f \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..4344b7cd8a1a9bfb8cd60e2aa0ece17f530f7d3d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-res.json @@ -0,0 +1 @@ +{"results": {"wmt20-ja-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010703148854351403, "chrf_stderr": 0.00022242113108130186, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ja-en": 0}} \ No newline at end of file diff --git a/scripts/yans/lm-evaluation-harness/tests/testdata/wsc273-v0-res.json b/scripts/yans/lm-evaluation-harness/tests/testdata/wsc273-v0-res.json new file mode 100644 index 0000000000000000000000000000000000000000..8f023b422a7003d2984e35e58045d8866954a4c4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/tests/testdata/wsc273-v0-res.json @@ -0,0 +1 @@ +{"results": {"wsc273": {"acc": 0.5164835164835165, "acc_stderr": 0.0303004740355766}}, "versions": {"wsc273": 0}} \ No newline at end of file