chivier commited on
Commit
271706f
·
1 Parent(s): e6c97c0

sync from github

Browse files
src/backend/envs.py CHANGED
@@ -58,7 +58,7 @@ class Tasks(Enum):
58
  # task20 = Task("race", "acc", "RACE", 0)
59
  task21 = Task("mmlu", "acc", "MMLU", 5)
60
  task22 = Task("gsm8k_custom", "em", "GSM8K", 5)
61
- task23 = Task("gsm8k_cot", "em", "GSM8K", 8)
62
 
63
 
64
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
 
58
  # task20 = Task("race", "acc", "RACE", 0)
59
  task21 = Task("mmlu", "acc", "MMLU", 5)
60
  task22 = Task("gsm8k_custom", "em", "GSM8K", 5)
61
+ # task23 = Task("gsm8k_cot", "em", "GSM8K", 8)
62
 
63
 
64
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
src/display/about.py CHANGED
@@ -12,12 +12,15 @@ The OPEN-MOE-LLM-LEADERBOARD includes generation and multiple choice tasks to me
12
  Tasks:
13
  - **Generation Self-consistancy** -- [SelfCheckGPT](https://github.com/potsawee/selfcheckgpt)
14
  - **Multiple Choice Performance** -- [MMLU](https://arxiv.org/abs/2009.03300)
 
15
 
16
  Columns and Metrics:
17
  - Method: The MOE LLMs inference framework.
18
  - E2E(s): Average End to End generation time in seconds.
19
  - PRE(s): Prefilling Time of input prompt in seconds.
20
  - T/s: Tokens throughout per second.
 
 
21
  - Precision: The precison of used model.
22
 
23
  """
 
12
  Tasks:
13
  - **Generation Self-consistancy** -- [SelfCheckGPT](https://github.com/potsawee/selfcheckgpt)
14
  - **Multiple Choice Performance** -- [MMLU](https://arxiv.org/abs/2009.03300)
15
+ - **Mathematics Problem-Solving Performance** -- [GSM8K](https://arxiv.org/abs/2110.14168)
16
 
17
  Columns and Metrics:
18
  - Method: The MOE LLMs inference framework.
19
  - E2E(s): Average End to End generation time in seconds.
20
  - PRE(s): Prefilling Time of input prompt in seconds.
21
  - T/s: Tokens throughout per second.
22
+ - MBU(%): Model Bandwidth Utilization.
23
+ - MFU(%): Model FLOPs Utilization.
24
  - Precision: The precison of used model.
25
 
26
  """
src/display/utils.py CHANGED
@@ -82,7 +82,7 @@ class Tasks(Enum):
82
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
83
  mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
84
  gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
85
- gsm8k_cot = Task("gsm8k_cot", "em", "GSM8K COT") #GSM8K COT/EM (5-shot)
86
 
87
 
88
  # These classes are for user facing column names,
 
82
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
83
  mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
84
  gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
85
+ # gsm8k_cot = Task("gsm8k_cot", "em", "GSM8K COT") #GSM8K COT/EM (5-shot)
86
 
87
 
88
  # These classes are for user facing column names,