djstrong commited on
Commit
3c6913b
·
1 Parent(s): d7e1a99
Files changed (1) hide show
  1. src/about.py +30 -30
src/about.py CHANGED
@@ -15,35 +15,35 @@ class Task:
15
  class Tasks(Enum):
16
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
17
  # task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice", 0.279)
18
- task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until", 0.416)
19
- task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice", 0.416)
20
- task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until", 0.368)
21
- task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice", 0.368)
22
- task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice", 0.143)
23
- task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until", 0.143)
24
- task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice", 0.279)
25
- task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until", 0.279)
26
- task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice", 0.289)
27
- task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until", 0.289)
28
- task12 = Task("polish_ppc_multiple_choice", "acc,none", "ppc_mc", "multiple_choice", 0.419)
29
- task13 = Task("polish_ppc_regex", "exact_match,score-first", "ppc_g", "generate_until", 0.419)
30
- task14 = Task("polish_psc_multiple_choice", "f1,none", "psc_mc", "multiple_choice", 0.466)
31
- task15 = Task("polish_psc_regex", "f1,score-first", "psc_g", "generate_until", 0.466)
32
- task16 = Task("polish_cbd_multiple_choice", "f1,none", "cbd_mc", "multiple_choice", 0.149)
33
- task17 = Task("polish_cbd_regex", "f1,score-first", "cbd_g", "generate_until", 0.149)
34
- task18 = Task("polish_klej_ner_multiple_choice", "acc,none", "klej_ner_mc", "multiple_choice", 0.343)
35
- task19 = Task("polish_klej_ner_regex", "exact_match,score-first", "klej_ner_g", "generate_until", 0.343)
36
- task21 = Task("polish_polqa_reranking_multiple_choice", "acc,none", "polqa_reranking_mc", "multiple_choice", 0.5335588952710677) # multiple_choice
37
- task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
38
- task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
39
- task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "generate_until", 0.0)
40
- task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "generate_until", 0.0)
41
- task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "generate_until", 0.0)
42
- task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
43
- task27 = Task("polish_poquad_reranking", "acc,none", "poquad_reranking", "other", 0.0)
44
- task28 = Task("polish_abstractive_poquad_rag", "levenshtein,none", "abstractive_poquad_rag", "other", 0.0)
45
- task29 = Task("polish_abstractive_poquad_open_book", "levenshtein,none", "abstractive_poquad_open_book", "other", 0.0)
46
- task30 = Task("polish_pes_regex", "exact_match,score-first", "pes_g", "other", 0.2)
47
 
48
 
49
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
@@ -61,7 +61,7 @@ TITLE = """
61
  <div style="display: flex; flex-wrap: wrap; justify-content: space-around;">
62
  <img src="https://speakleash.org/wp-content/uploads/2023/09/SpeakLeash_logo.svg">
63
  <div>
64
- <h1 align="center" id="space-title">Open PL LLM Leaderboard (0-shot and 5-shot)</h1>
65
  <h2 align="center" id="space-subtitle">Leaderboard was created as part of an open-science project SpeakLeash.org</h2>
66
  </div>
67
  </div>
 
15
  class Tasks(Enum):
16
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
17
  # task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice", 0.279)
18
+ # task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until", 0.416)
19
+ # task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice", 0.416)
20
+ # task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until", 0.368)
21
+ # task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice", 0.368)
22
+ # task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice", 0.143)
23
+ # task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until", 0.143)
24
+ # task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice", 0.279)
25
+ # task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until", 0.279)
26
+ # task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice", 0.289)
27
+ # task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until", 0.289)
28
+ # task12 = Task("polish_ppc_multiple_choice", "acc,none", "ppc_mc", "multiple_choice", 0.419)
29
+ # task13 = Task("polish_ppc_regex", "exact_match,score-first", "ppc_g", "generate_until", 0.419)
30
+ # task14 = Task("polish_psc_multiple_choice", "f1,none", "psc_mc", "multiple_choice", 0.466)
31
+ # task15 = Task("polish_psc_regex", "f1,score-first", "psc_g", "generate_until", 0.466)
32
+ # task16 = Task("polish_cbd_multiple_choice", "f1,none", "cbd_mc", "multiple_choice", 0.149)
33
+ # task17 = Task("polish_cbd_regex", "f1,score-first", "cbd_g", "generate_until", 0.149)
34
+ # task18 = Task("polish_klej_ner_multiple_choice", "acc,none", "klej_ner_mc", "multiple_choice", 0.343)
35
+ # task19 = Task("polish_klej_ner_regex", "exact_match,score-first", "klej_ner_g", "generate_until", 0.343)
36
+ # task21 = Task("polish_polqa_reranking_multiple_choice", "acc,none", "polqa_reranking_mc", "multiple_choice", 0.5335588952710677) # multiple_choice
37
+ # task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
38
+ # task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
39
+ # task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "generate_until", 0.0)
40
+ # task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "generate_until", 0.0)
41
+ # task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "generate_until", 0.0)
42
+ # task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
43
+ # task27 = Task("polish_poquad_reranking", "acc,none", "poquad_reranking", "other", 0.0)
44
+ # task28 = Task("polish_abstractive_poquad_rag", "levenshtein,none", "abstractive_poquad_rag", "other", 0.0)
45
+ # task29 = Task("polish_abstractive_poquad_open_book", "levenshtein,none", "abstractive_poquad_open_book", "other", 0.0)
46
+ task30 = Task("polish_pes_regex", "exact_match,score-first", "pes_g", "generate_until", 0.2)
47
 
48
 
49
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
 
61
  <div style="display: flex; flex-wrap: wrap; justify-content: space-around;">
62
  <img src="https://speakleash.org/wp-content/uploads/2023/09/SpeakLeash_logo.svg">
63
  <div>
64
+ <h1 align="center" id="space-title">Polish Medical Leaderboard</h1>
65
  <h2 align="center" id="space-subtitle">Leaderboard was created as part of an open-science project SpeakLeash.org</h2>
66
  </div>
67
  </div>