Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on Jun 24

Commit

91c5b22

•

1 Parent(s): 93916f2

add tag

Browse files

Files changed (2) hide show

src/md.py +3 -1
src/utils.py +20 -7

src/md.py CHANGED Viewed

@@ -2,6 +2,8 @@ ABOUT_TEXT = """
 We compute the win percentage for a reward model on hand curated chosen-rejected pairs for each prompt.
 A win is when the score for the chosen response is higher than the score for the rejected response.
 ## Overview
 We average over 4 core sections (per prompt weighting):
@@ -93,5 +95,5 @@ For more details, see the [dataset](https://huggingface.co/datasets/allenai/rewa
 TOP_TEXT = """
 # RewardBench: Evaluating Reward Models
 ### Evaluating the capabilities, safety, and pitfalls of reward models
-[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {}
 """

 We compute the win percentage for a reward model on hand curated chosen-rejected pairs for each prompt.
 A win is when the score for the chosen response is higher than the score for the rejected response.
+Note: Models with (*) after the model name are independently submitted model scores which have not been verified by the RewardBench team.
 ## Overview
 We average over 4 core sections (per prompt weighting):
 TOP_TEXT = """
 # RewardBench: Evaluating Reward Models
 ### Evaluating the capabilities, safety, and pitfalls of reward models
+[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {} | * Unverified models
 """

src/utils.py CHANGED Viewed

@@ -5,24 +5,37 @@ import numpy as np
 import os
 import re
 # From Open LLM Leaderboard
 def model_hyperlink(link, model_name):
     # if model_name is above 50 characters, return first 47 characters and "..."
     if len(model_name) > 50:
         model_name = model_name[:47] + "..."
     if model_name == "random":
-        return "random"
     elif model_name == "Cohere March 2024":
-        return f'<a target="_blank" href="https://huggingface.co/Cohere" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "openai" == model_name.split("/")[0]:
-        return f'<a target="_blank" href="https://huggingface.co/openai" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "Anthropic" == model_name.split("/")[0]:
-        return f'<a target="_blank" href="https://huggingface.co/Anthropic" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "google" == model_name.split("/")[0]:
-        return f'<a target="_blank" href="https://huggingface.co/google" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "PoLL" == model_name.split("/")[0]:
-        return model_name
-    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def undo_hyperlink(html_string):
     # Regex pattern to match content inside > and <

 import os
 import re
+UNVERIFIED_MODELS = [
+    "nvidia/Nemotron-4-340B-Reward",
+    "nvidia/Llama3-70B-SteerLM-RM",
+    "Cohere May 2024",
+    "google/gemini-1.5-pro-0514",
+    "Cohere March 2024",
+]
 # From Open LLM Leaderboard
 def model_hyperlink(link, model_name):
     # if model_name is above 50 characters, return first 47 characters and "..."
     if len(model_name) > 50:
         model_name = model_name[:47] + "..."
     if model_name == "random":
+        output = "random"
     elif model_name == "Cohere March 2024":
+        output = f'<a target="_blank" href="https://huggingface.co/Cohere" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "openai" == model_name.split("/")[0]:
+        output = f'<a target="_blank" href="https://huggingface.co/openai" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "Anthropic" == model_name.split("/")[0]:
+        output = f'<a target="_blank" href="https://huggingface.co/Anthropic" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "google" == model_name.split("/")[0]:
+        output = f'<a target="_blank" href="https://huggingface.co/google" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
     elif "PoLL" == model_name.split("/")[0]:
+        output = model_name
+    output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+    if model_name in UNVERIFIED_MODELS:
+        return output + " *"
+    else:
+        return output
 def undo_hyperlink(html_string):
     # Regex pattern to match content inside > and <