gardarjuto commited on
Commit
6aa2b20
1 Parent(s): 1d38d16

add blacklisted examples for inflection

Browse files
Files changed (1) hide show
  1. quiz.py +20 -0
quiz.py CHANGED
@@ -22,6 +22,19 @@ BENCHMARKS = {
22
  "name": "Fallbeygingar",
23
  "path": "mideind/icelandic-inflection-all-flat",
24
  "type": "free_text",
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  "icelandic-belebele": {
27
  "name": "Lesskilningur",
@@ -182,6 +195,13 @@ class BenchmarkQuiz:
182
  icelandic_sentence_gec_preprocessing(sample) for sample in samples
183
  ]
184
  elif benchmark_name == "icelandic-inflection-all":
 
 
 
 
 
 
 
185
  samples = [inflection_all_preprocessing(sample) for sample in samples]
186
  elif benchmark_name == "icelandic-belebele":
187
  samples = [belebele_preprocessing(sample) for sample in samples]
 
22
  "name": "Fallbeygingar",
23
  "path": "mideind/icelandic-inflection-all-flat",
24
  "type": "free_text",
25
+ "blacklisted_noun_phrases": [
26
+ "hágæða sprengjutilræði",
27
+ "óstöðvandi geðröskun",
28
+ "allsber meirihluti",
29
+ "geðsjúkt álagsstýrikerfi",
30
+ "kynþokkafullt starfsvið",
31
+ "lettneskur þræll",
32
+ "nígerískt meyjarhaft",
33
+ "kynæsandi málvísindamaður",
34
+ "kynþokkafullur menntaskólakennari",
35
+ "lóðrétt forhúð",
36
+ "vandþrædd hvatabuska",
37
+ ],
38
  },
39
  "icelandic-belebele": {
40
  "name": "Lesskilningur",
 
195
  icelandic_sentence_gec_preprocessing(sample) for sample in samples
196
  ]
197
  elif benchmark_name == "icelandic-inflection-all":
198
+ while any(
199
+ sample["noun_phrase"] in BENCHMARKS[benchmark_name]["blacklisted_noun_phrases"]
200
+ for sample in samples
201
+ ):
202
+ print(samples)
203
+ random_indices = random.sample(range(len(dataset)), 5)
204
+ samples = dataset.select(random_indices)
205
  samples = [inflection_all_preprocessing(sample) for sample in samples]
206
  elif benchmark_name == "icelandic-belebele":
207
  samples = [belebele_preprocessing(sample) for sample in samples]