Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
danielz02
commited on
Shorten perspective display name
Browse files- src/display/about.py +13 -13
- src/leaderboard/read_evals.py +1 -1
src/display/about.py
CHANGED
|
@@ -13,12 +13,12 @@ class Task:
|
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
task0 = Task("toxicity", "aggregated-results", "Toxicity")
|
| 16 |
-
task1 = Task("stereotype", "aggregated-results", "Stereotype
|
| 17 |
-
task2 = Task("adv", "aggregated-results", "
|
| 18 |
-
task3 = Task("ood", "aggregated-results", "OoD
|
| 19 |
-
task4 = Task("adv_demo", "aggregated-results", "
|
| 20 |
task5 = Task("privacy", "aggregated-results", "Privacy")
|
| 21 |
-
task6 = Task("ethics", "aggregated-results", "
|
| 22 |
task7 = Task("fairness", "aggregated-results", "Fairness")
|
| 23 |
|
| 24 |
|
|
@@ -41,14 +41,14 @@ limitations, and potential risks involved in deploying these state-of-the-art La
|
|
| 41 |
|
| 42 |
This project is organized around the following eight primary perspectives of trustworthiness, including:
|
| 43 |
|
| 44 |
-
Toxicity
|
| 45 |
-
Stereotype and bias
|
| 46 |
-
Adversarial robustness
|
| 47 |
-
Out-of-Distribution Robustness
|
| 48 |
-
Privacy
|
| 49 |
-
Robustness to Adversarial Demonstrations
|
| 50 |
-
Machine Ethics
|
| 51 |
-
Fairness
|
| 52 |
|
| 53 |
## Reproducibility
|
| 54 |
To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
|
|
|
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
task0 = Task("toxicity", "aggregated-results", "Toxicity")
|
| 16 |
+
task1 = Task("stereotype", "aggregated-results", "Stereotype")
|
| 17 |
+
task2 = Task("adv", "aggregated-results", "Adv Robustness")
|
| 18 |
+
task3 = Task("ood", "aggregated-results", "OoD")
|
| 19 |
+
task4 = Task("adv_demo", "aggregated-results", "Adv Demo")
|
| 20 |
task5 = Task("privacy", "aggregated-results", "Privacy")
|
| 21 |
+
task6 = Task("ethics", "aggregated-results", "Ethics")
|
| 22 |
task7 = Task("fairness", "aggregated-results", "Fairness")
|
| 23 |
|
| 24 |
|
|
|
|
| 41 |
|
| 42 |
This project is organized around the following eight primary perspectives of trustworthiness, including:
|
| 43 |
|
| 44 |
+
+ Toxicity
|
| 45 |
+
+ Stereotype and bias
|
| 46 |
+
+ Adversarial robustness
|
| 47 |
+
+ Out-of-Distribution Robustness
|
| 48 |
+
+ Privacy
|
| 49 |
+
+ Robustness to Adversarial Demonstrations
|
| 50 |
+
+ Machine Ethics
|
| 51 |
+
+ Fairness
|
| 52 |
|
| 53 |
## Reproducibility
|
| 54 |
To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -37,7 +37,6 @@ class EvalResult:
|
|
| 37 |
data = json.load(fp)
|
| 38 |
|
| 39 |
config = data.get("config")
|
| 40 |
-
print(config)
|
| 41 |
|
| 42 |
# Precision
|
| 43 |
precision = Precision.from_str(config.get("model_dtype"))
|
|
@@ -97,6 +96,7 @@ class EvalResult:
|
|
| 97 |
try:
|
| 98 |
with open(request_file, "r") as f:
|
| 99 |
request = json.load(f)
|
|
|
|
| 100 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 101 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 102 |
self.license = request.get("license", "?")
|
|
|
|
| 37 |
data = json.load(fp)
|
| 38 |
|
| 39 |
config = data.get("config")
|
|
|
|
| 40 |
|
| 41 |
# Precision
|
| 42 |
precision = Precision.from_str(config.get("model_dtype"))
|
|
|
|
| 96 |
try:
|
| 97 |
with open(request_file, "r") as f:
|
| 98 |
request = json.load(f)
|
| 99 |
+
print("Read Request", request)
|
| 100 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 101 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 102 |
self.license = request.get("license", "?")
|