djstrong commited on
Commit
23c87c8
·
1 Parent(s): 3c6913b
src/about.py CHANGED
@@ -43,7 +43,64 @@ class Tasks(Enum):
43
  # task27 = Task("polish_poquad_reranking", "acc,none", "poquad_reranking", "other", 0.0)
44
  # task28 = Task("polish_abstractive_poquad_rag", "levenshtein,none", "abstractive_poquad_rag", "other", 0.0)
45
  # task29 = Task("polish_abstractive_poquad_open_book", "levenshtein,none", "abstractive_poquad_open_book", "other", 0.0)
46
- task30 = Task("polish_pes_regex", "exact_match,score-first", "pes_g", "generate_until", 0.2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
 
43
  # task27 = Task("polish_poquad_reranking", "acc,none", "poquad_reranking", "other", 0.0)
44
  # task28 = Task("polish_abstractive_poquad_rag", "levenshtein,none", "abstractive_poquad_rag", "other", 0.0)
45
  # task29 = Task("polish_abstractive_poquad_open_book", "levenshtein,none", "abstractive_poquad_open_book", "other", 0.0)
46
+ task30a = Task("polish_pes_regex", "exact_match,score-first", "pes", "generate_until", 0.2)
47
+ task30 = Task("polish_pes_alergologia", "exact_match,score-first", "alergologia", "generate_until", 0.2)
48
+ task31 = Task("polish_pes_anestezjologia", "exact_match,score-first", "anestezjologia", "generate_until", 0.2)
49
+ task32 = Task("polish_pes_angiologia", "exact_match,score-first", "angiologia", "generate_until", 0.2)
50
+ task33 = Task("polish_pes_balneologia_i_medycyna_fizykalna", "exact_match,score-first", "balneologia_i_medycyna_fizykalna", "generate_until", 0.2)
51
+ task34 = Task("polish_pes_chirurgia_dziecieca", "exact_match,score-first", "chirurgia_dziecieca", "generate_until", 0.2)
52
+ task35 = Task("polish_pes_chirurgia_naczyniowa", "exact_match,score-first", "chirurgia_naczyniowa", "generate_until", 0.2)
53
+ task36 = Task("polish_pes_chirurgia_ogolna", "exact_match,score-first", "chirurgia_ogolna", "generate_until", 0.2)
54
+ task37 = Task("polish_pes_chirurgia_onkologiczna", "exact_match,score-first", "chirurgia_onkologiczna", "generate_until", 0.2)
55
+ task38 = Task("polish_pes_chirurgia_stomatologiczna", "exact_match,score-first", "chirurgia_stomatologiczna", "generate_until", 0.2)
56
+ task39 = Task("polish_pes_chirurgia_szczekowo-twarzowa", "exact_match,score-first", "chirurgia_szczekowo-twarzowa", "generate_until", 0.2)
57
+ task40 = Task("polish_pes_choroby_pluc", "exact_match,score-first", "choroby_pluc", "generate_until", 0.2)
58
+ task41 = Task("polish_pes_choroby_pluc_dzieci", "exact_match,score-first", "choroby_pluc_dzieci", "generate_until", 0.2)
59
+ task42 = Task("polish_pes_choroby_wewnetrzne", "exact_match,score-first", "choroby_wewnetrzne", "generate_until", 0.2)
60
+ task43 = Task("polish_pes_choroby_zakazne", "exact_match,score-first", "choroby_zakazne", "generate_until", 0.2)
61
+ task44 = Task("polish_pes_dermatologia_i_wenerologia", "exact_match,score-first", "dermatologia_i_wenerologia", "generate_until", 0.2)
62
+ task45 = Task("polish_pes_diabetologia", "exact_match,score-first", "diabetologia", "generate_until", 0.2)
63
+ task46 = Task("polish_pes_endokrynologia", "exact_match,score-first", "endokrynologia", "generate_until", 0.2)
64
+ task47 = Task("polish_pes_endokrynologia_ginekologiczna_i_rozrodczosc", "exact_match,score-first", "endokrynologia_ginekologiczna_i_rozrodczosc", "generate_until", 0.2)
65
+ task48 = Task("polish_pes_endokrynologia_i_diabetologia_dziecieca", "exact_match,score-first", "endokrynologia_i_diabetologia_dziecieca", "generate_until", 0.2)
66
+ task49 = Task("polish_pes_gastroenterologia", "exact_match,score-first", "gastroenterologia", "generate_until", 0.2)
67
+ task50 = Task("polish_pes_gastroenterologia_dziecieca", "exact_match,score-first", "gastroenterologia_dziecieca", "generate_until", 0.2)
68
+ task51 = Task("polish_pes_geriatria", "exact_match,score-first", "geriatria", "generate_until", 0.2)
69
+ task52 = Task("polish_pes_ginekologia_onkologiczna", "exact_match,score-first", "ginekologia_onkologiczna", "generate_until", 0.2)
70
+ task53 = Task("polish_pes_hematologia", "exact_match,score-first", "hematologia", "generate_until", 0.2)
71
+ task54 = Task("polish_pes_hipertensjologia", "exact_match,score-first", "hipertensjologia", "generate_until", 0.2)
72
+ task55 = Task("polish_pes_kardiochirurgia", "exact_match,score-first", "kardiochirurgia", "generate_until", 0.2)
73
+ task56 = Task("polish_pes_kardiologia", "exact_match,score-first", "kardiologia", "generate_until", 0.2)
74
+ task57 = Task("polish_pes_medycyna_pracy", "exact_match,score-first", "medycyna_pracy", "generate_until", 0.2)
75
+ task58 = Task("polish_pes_medycyna_paliatywna", "exact_match,score-first", "medycyna_paliatywna", "generate_until", 0.2)
76
+ task59 = Task("polish_pes_medycyna_ratunkowa", "exact_match,score-first", "medycyna_ratunkowa", "generate_until", 0.2)
77
+ task60 = Task("polish_pes_medycyna_rodzinna", "exact_match,score-first", "medycyna_rodzinna", "generate_until", 0.2)
78
+ task61 = Task("polish_pes_medycyna_sportowa", "exact_match,score-first", "medycyna_sportowa", "generate_until", 0.2)
79
+ task62 = Task("polish_pes_nefrologia", "exact_match,score-first", "nefrologia", "generate_until", 0.2)
80
+ task63 = Task("polish_pes_neonatologia", "exact_match,score-first", "neonatologia", "generate_until", 0.2)
81
+ task64 = Task("polish_pes_neurochirurgia", "exact_match,score-first", "neurochirurgia", "generate_until", 0.2)
82
+ task65 = Task("polish_pes_neurologia", "exact_match,score-first", "neurologia", "generate_until", 0.2)
83
+ task66 = Task("polish_pes_neurologia_dziecieca", "exact_match,score-first", "neurologia_dziecieca", "generate_until", 0.2)
84
+ task67 = Task("polish_pes_okulistyka", "exact_match,score-first", "okulistyka", "generate_until", 0.2)
85
+ task68 = Task("polish_pes_onkologia_kliniczna", "exact_match,score-first", "onkologia_kliniczna", "generate_until", 0.2)
86
+ task69 = Task("polish_pes_ortodoncja", "exact_match,score-first", "ortodoncja", "generate_until", 0.2)
87
+ task70 = Task("polish_pes_ortopedia", "exact_match,score-first", "ortopedia", "generate_until", 0.2)
88
+ task71 = Task("polish_pes_otolaryngologia", "exact_match,score-first", "otolaryngologia", "generate_until", 0.2)
89
+ task72 = Task("polish_pes_patomorfologia", "exact_match,score-first", "patomorfologia", "generate_until", 0.2)
90
+ task73 = Task("polish_pes_pediatria", "exact_match,score-first", "pediatria", "generate_until", 0.2)
91
+ task74 = Task("polish_pes_perinatologia", "exact_match,score-first", "perinatologia", "generate_until", 0.2)
92
+ task75 = Task("polish_pes_periodontologia", "exact_match,score-first", "periodontologia", "generate_until", 0.2)
93
+ task76 = Task("polish_pes_poloznictwo_i_ginekologia", "exact_match,score-first", "poloznictwo_i_ginekologia", "generate_until", 0.2)
94
+ task77 = Task("polish_pes_protetyka_stomatologiczna", "exact_match,score-first", "protetyka_stomatologiczna", "generate_until", 0.2)
95
+ task78 = Task("polish_pes_psychiatria", "exact_match,score-first", "psychiatria", "generate_until", 0.2)
96
+ task79 = Task("polish_pes_psychiatria_dzieci_i_mlodziezy", "exact_match,score-first", "psychiatria_dzieci_i_mlodziezy", "generate_until", 0.2)
97
+ task80 = Task("polish_pes_radiologia_i_diagnostyka_obrazowa", "exact_match,score-first", "radiologia_i_diagnostyka_obrazowa", "generate_until", 0.2)
98
+ task81 = Task("polish_pes_radioterapia_onkologiczna", "exact_match,score-first", "radioterapia_onkologiczna", "generate_until", 0.2)
99
+ task82 = Task("polish_pes_rehabilitacja_medyczna", "exact_match,score-first", "rehabilitacja_medyczna", "generate_until", 0.2)
100
+ task83 = Task("polish_pes_reumatologia", "exact_match,score-first", "reumatologia", "generate_until", 0.2)
101
+ task84 = Task("polish_pes_stomatologia_dziecieca", "exact_match,score-first", "stomatologia_dziecieca", "generate_until", 0.2)
102
+ task85 = Task("polish_pes_stomatologia_zachowawcza", "exact_match,score-first", "stomatologia_zachowawcza", "generate_until", 0.2)
103
+ task86 = Task("polish_pes_transplantologia_kliniczna", "exact_match,score-first", "transplantologia_kliniczna", "generate_until", 0.2)
104
 
105
 
106
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
src/display/utils.py CHANGED
@@ -32,14 +32,14 @@ auto_eval_column_dict.append(["n_shot", ColumnContent, ColumnContent("n_shot", "
32
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", True)])
33
  #Scores
34
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
35
- auto_eval_column_dict.append(["average_old", ColumnContent, ColumnContent("Average old", "number", False)])
36
- auto_eval_column_dict.append(["average_g", ColumnContent, ColumnContent("Avg g", "number", True)])
37
- auto_eval_column_dict.append(["average_mc", ColumnContent, ColumnContent("Avg mc", "number", True)])
38
- auto_eval_column_dict.append(["average_rag", ColumnContent, ColumnContent("Avg RAG", "number", True)])
39
 
40
  for task in Tasks:
41
- show = task.value.col_name not in ['poquad_reranking','abstractive_poquad_rag','abstractive_poquad_open_book', 'pes_g']
42
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", show)])
43
  # Model information
44
 
45
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
 
32
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", True)])
33
  #Scores
34
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
35
+ # auto_eval_column_dict.append(["average_old", ColumnContent, ColumnContent("Average old", "number", False)])
36
+ # auto_eval_column_dict.append(["average_g", ColumnContent, ColumnContent("Avg g", "number", True)])
37
+ # auto_eval_column_dict.append(["average_mc", ColumnContent, ColumnContent("Avg mc", "number", True)])
38
+ # auto_eval_column_dict.append(["average_rag", ColumnContent, ColumnContent("Avg RAG", "number", True)])
39
 
40
  for task in Tasks:
41
+ # show = task.value.col_name not in ['poquad_reranking','abstractive_poquad_rag','abstractive_poquad_open_book', 'pes_g']
42
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
43
  # Model information
44
 
45
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
src/leaderboard/read_evals.py CHANGED
@@ -283,30 +283,30 @@ class EvalResult:
283
  except AttributeError:
284
  print(f"AttributeError revision")
285
 
286
- try:
287
- data_dict[AutoEvalColumn.average_old.name] = average_old
288
- except KeyError:
289
- print(f"Could not find average_old")
290
 
291
  try:
292
  data_dict[AutoEvalColumn.average.name] = average
293
  except KeyError:
294
  print(f"Could not find average")
295
 
296
- try:
297
- data_dict[AutoEvalColumn.average_g.name] = average_g
298
- except KeyError:
299
- print(f"Could not find average_g")
300
 
301
- try:
302
- data_dict[AutoEvalColumn.average_mc.name] = average_mc
303
- except KeyError:
304
- print(f"Could not find average_mc")
305
 
306
- try:
307
- data_dict[AutoEvalColumn.average_rag.name] = average_rag
308
- except KeyError:
309
- print(f"Could not find average_rag")
310
 
311
  try:
312
  data_dict[AutoEvalColumn.license.name] = self.license
 
283
  except AttributeError:
284
  print(f"AttributeError revision")
285
 
286
+ # try:
287
+ # data_dict[AutoEvalColumn.average_old.name] = average_old
288
+ # except KeyError:
289
+ # print(f"Could not find average_old")
290
 
291
  try:
292
  data_dict[AutoEvalColumn.average.name] = average
293
  except KeyError:
294
  print(f"Could not find average")
295
 
296
+ # try:
297
+ # data_dict[AutoEvalColumn.average_g.name] = average_g
298
+ # except KeyError:
299
+ # print(f"Could not find average_g")
300
 
301
+ # try:
302
+ # data_dict[AutoEvalColumn.average_mc.name] = average_mc
303
+ # except KeyError:
304
+ # print(f"Could not find average_mc")
305
 
306
+ # try:
307
+ # data_dict[AutoEvalColumn.average_rag.name] = average_rag
308
+ # except KeyError:
309
+ # print(f"Could not find average_rag")
310
 
311
  try:
312
  data_dict[AutoEvalColumn.license.name] = self.license