Commit
•
1c4c1ca
1
Parent(s):
5946fbf
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
Browse filesBeep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
-
|
4 |
tags:
|
5 |
- text-generation
|
6 |
-
|
7 |
commercial: false
|
8 |
model-index:
|
9 |
- name: inverse-scaling/opt-350m_eval
|
@@ -17,14 +17,16 @@ model-index:
|
|
17 |
config: inverse-scaling--NeQA
|
18 |
split: train
|
19 |
metrics:
|
20 |
-
-
|
21 |
-
type: accuracy
|
22 |
value: 0.4666666666666667
|
|
|
23 |
verified: true
|
24 |
-
|
25 |
-
|
26 |
value: 0.9192380222864449
|
|
|
27 |
verified: true
|
|
|
28 |
- task:
|
29 |
type: zero-shot-classification
|
30 |
name: Zero-Shot Text Classification
|
@@ -34,14 +36,16 @@ model-index:
|
|
34 |
config: inverse-scaling--quote-repetition
|
35 |
split: train
|
36 |
metrics:
|
37 |
-
-
|
38 |
-
type: accuracy
|
39 |
value: 0.9633333333333334
|
|
|
40 |
verified: true
|
41 |
-
|
42 |
-
|
43 |
value: 0.03444786100047819
|
|
|
44 |
verified: true
|
|
|
45 |
- task:
|
46 |
type: zero-shot-classification
|
47 |
name: Zero-Shot Text Classification
|
@@ -51,14 +55,16 @@ model-index:
|
|
51 |
config: inverse-scaling--redefine-math
|
52 |
split: train
|
53 |
metrics:
|
54 |
-
-
|
55 |
-
type: accuracy
|
56 |
value: 0.6877777777777778
|
|
|
57 |
verified: true
|
58 |
-
|
59 |
-
|
60 |
value: 0.6016371671193176
|
|
|
61 |
verified: true
|
|
|
62 |
- task:
|
63 |
type: zero-shot-classification
|
64 |
name: Zero-Shot Text Classification
|
@@ -68,14 +74,16 @@ model-index:
|
|
68 |
config: inverse-scaling--hindsight-neglect-10shot
|
69 |
split: train
|
70 |
metrics:
|
71 |
-
-
|
72 |
-
type: accuracy
|
73 |
value: 0.4380952380952381
|
|
|
74 |
verified: true
|
75 |
-
|
76 |
-
|
77 |
value: 0.8774787804555325
|
|
|
78 |
verified: true
|
|
|
79 |
- task:
|
80 |
type: zero-shot-classification
|
81 |
name: Zero-Shot Text Classification
|
@@ -85,14 +93,16 @@ model-index:
|
|
85 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
86 |
split: test
|
87 |
metrics:
|
88 |
-
-
|
89 |
-
type: accuracy
|
90 |
value: 0.44660194174757284
|
|
|
91 |
verified: true
|
92 |
-
|
93 |
-
|
94 |
value: 0.9301078982717057
|
|
|
95 |
verified: true
|
|
|
96 |
- task:
|
97 |
type: zero-shot-classification
|
98 |
name: Zero-Shot Text Classification
|
@@ -102,14 +112,16 @@ model-index:
|
|
102 |
config: mathemakitten--winobias_antistereotype_test_v5
|
103 |
split: test
|
104 |
metrics:
|
105 |
-
-
|
106 |
-
type: accuracy
|
107 |
value: 0.4368932038834951
|
|
|
108 |
verified: true
|
109 |
-
|
110 |
-
|
111 |
value: 0.9175132444057151
|
|
|
112 |
verified: true
|
|
|
113 |
---
|
114 |
|
115 |
|
|
|
1 |
---
|
2 |
language: en
|
3 |
+
license: other
|
4 |
tags:
|
5 |
- text-generation
|
6 |
+
inference: false
|
7 |
commercial: false
|
8 |
model-index:
|
9 |
- name: inverse-scaling/opt-350m_eval
|
|
|
17 |
config: inverse-scaling--NeQA
|
18 |
split: train
|
19 |
metrics:
|
20 |
+
- type: accuracy
|
|
|
21 |
value: 0.4666666666666667
|
22 |
+
name: Accuracy
|
23 |
verified: true
|
24 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzczZDk5ODcxOWEyYTg2NDc5MzUxYTUzY2IxMzEzYTQ4Mjc2NjY1YzZkNDNmODg1Y2JhMzEzNzNiNDE0MzVlMCIsInZlcnNpb24iOjF9._V6n5pjfCnCFhUIN5rOfSj4enIrb3uo7hDBgnwUsnVxJ2vUWdZiSXR29_ZtGBlJ8b78gfEVQPr9JkZ2vWH-kDw
|
25 |
+
- type: loss
|
26 |
value: 0.9192380222864449
|
27 |
+
name: Loss
|
28 |
verified: true
|
29 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWMyMzFmNjM4MDhiMGRiM2FlOTU2NjJmY2FiNmMyYTFjZGM5MjMyNzU1MTcyYjhhOGI3MjQ1N2ZkZWNhNTNjOCIsInZlcnNpb24iOjF9.zCtWmvufSPCQpO28PXuyd4tuA_m1WjoNKivlxMW9Z8BgFmhvTObC9FtbS0kkJ6hS9wS2NHLi8-gHyQqjCuCJAA
|
30 |
- task:
|
31 |
type: zero-shot-classification
|
32 |
name: Zero-Shot Text Classification
|
|
|
36 |
config: inverse-scaling--quote-repetition
|
37 |
split: train
|
38 |
metrics:
|
39 |
+
- type: accuracy
|
|
|
40 |
value: 0.9633333333333334
|
41 |
+
name: Accuracy
|
42 |
verified: true
|
43 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWQ4NGY0N2UzZTI0MDRkYjQwODhjNWJhMjg3OTExYzI2NmVkMGVmMWJjYjMxNDZiZTYwZDdmMzhhMTJiNTM5ZCIsInZlcnNpb24iOjF9.aLrG02yUjaEbIoarFb13RKohrd2v9EhjefJ8Hp8RbK7cFtgZSbbybZ4q3_tmZEjZW96CCeHTldVjiuCfKM36CQ
|
44 |
+
- type: loss
|
45 |
value: 0.03444786100047819
|
46 |
+
name: Loss
|
47 |
verified: true
|
48 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNzgwODhkYzcyNTZhN2MxOTVhYmQ3YjdhZTM3ZTZhNzRhNDRlMzBjNzJiNTllNWU4MWM4M2E0NzljMjViOTUwNiIsInZlcnNpb24iOjF9.raav8NSrkoH1d7veZGaQxapvVB9J7s9E5dPqyMkZ2dWxWHoqWCbT1Rwt_FpTbkd8g2qSlnQBGF94W1Mo_tzPAw
|
49 |
- task:
|
50 |
type: zero-shot-classification
|
51 |
name: Zero-Shot Text Classification
|
|
|
55 |
config: inverse-scaling--redefine-math
|
56 |
split: train
|
57 |
metrics:
|
58 |
+
- type: accuracy
|
|
|
59 |
value: 0.6877777777777778
|
60 |
+
name: Accuracy
|
61 |
verified: true
|
62 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTVkNTE3NTZjYTUzYzViMGUzYjhlMGNjMjZlNDE4MGE5N2NmZDQyYWQzOTg1N2JmMTY1ODg0Y2UyYzYxNzQ3NCIsInZlcnNpb24iOjF9.Z8xYedPo4bCpRjO7soiqpoQX_JusfqLtDlUFl5rug7n-9BDPy8EQyCm37bKBAge0SosQQxMaPv04Q_doUhVlAw
|
63 |
+
- type: loss
|
64 |
value: 0.6016371671193176
|
65 |
+
name: Loss
|
66 |
verified: true
|
67 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ1YmY4ODlmZjJlNTY1NWQ3ZTQ0MjM5MjBkYjg2MjZiNWEzODYzNDcwOWQ1ZWU3MmY2MjRjYmQ2ZDQ1NDk0NSIsInZlcnNpb24iOjF9.Rr-dawBCi_eof82m2928wQXvEWiyeuFGf2Zpk259vkmDI6Fkn3Pz_3bNzoGNNOUVoOKhDc9cUYjBE11tIv9tBA
|
68 |
- task:
|
69 |
type: zero-shot-classification
|
70 |
name: Zero-Shot Text Classification
|
|
|
74 |
config: inverse-scaling--hindsight-neglect-10shot
|
75 |
split: train
|
76 |
metrics:
|
77 |
+
- type: accuracy
|
|
|
78 |
value: 0.4380952380952381
|
79 |
+
name: Accuracy
|
80 |
verified: true
|
81 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDFmMWRmNjQyMWJkNzgxOTNkNDBiYjA0MjQ1NjE2NzM0ZjQ0Mzg0ZjkyYzlhMTdjMDZhNWY0NjU2NTAzYmFmYSIsInZlcnNpb24iOjF9.5VZ9gq1ldypfzpPYP3_Wv64rDlVO3jlJrnxK28qXDTcaHCcvF4YtYNry5ud8y9T9L1YrTVMaaPqLafavOHHlDQ
|
82 |
+
- type: loss
|
83 |
value: 0.8774787804555325
|
84 |
+
name: Loss
|
85 |
verified: true
|
86 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTU0YTY5ZWIxMzRhNzRlZDliMDNlZjcyM2FjOGRkZjVlNWNiM2I0MWNlZTFiNzdhMTUxMDU4YTY4YmJhZWU2NSIsInZlcnNpb24iOjF9.MletkNEfPZm3q3WPT3T7D_tO-zGxQj_opy9IPgPJmTxVpGRnZePdXl47U4LiWHPw4BrCrExIsPeBpeeJR9ZNBQ
|
87 |
- task:
|
88 |
type: zero-shot-classification
|
89 |
name: Zero-Shot Text Classification
|
|
|
93 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
94 |
split: test
|
95 |
metrics:
|
96 |
+
- type: accuracy
|
|
|
97 |
value: 0.44660194174757284
|
98 |
+
name: Accuracy
|
99 |
verified: true
|
100 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjhkNzNlNjliYTk2NzhjNjkxNDcxNjFjZGY1N2VjNzMyN2JlNDgwNDU4YTQ3NGQ5NWQ3ZTJiMjU2MWRiYzI4MiIsInZlcnNpb24iOjF9.Ln1Bi_uUuFTmq-qBfrd7qcD_29fXC_5FTH5aenCuqmZ8TK_akoUbTxIj39FTxfFUmJtxnFgiyCcolTIOB9vgCA
|
101 |
+
- type: loss
|
102 |
value: 0.9301078982717057
|
103 |
+
name: Loss
|
104 |
verified: true
|
105 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGUzNWRkN2RlMjA0NzNhMDhmZWMyOWVmYzk5YTZkZTYyMmRjYWRlNzMyOWYyMGYxYmI0MWJmYWNlYWFhOTliNyIsInZlcnNpb24iOjF9.m-Vjvm6yNYiShP08VEdT-XSVDUpC0Ko96F30YNtg047LE_Mx7UJ3bCSo1MnnGqQ6FIS1j4B2H1guJIvLyRMSAg
|
106 |
- task:
|
107 |
type: zero-shot-classification
|
108 |
name: Zero-Shot Text Classification
|
|
|
112 |
config: mathemakitten--winobias_antistereotype_test_v5
|
113 |
split: test
|
114 |
metrics:
|
115 |
+
- type: accuracy
|
|
|
116 |
value: 0.4368932038834951
|
117 |
+
name: Accuracy
|
118 |
verified: true
|
119 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjIyNmE2YjJhOThmNzFlZjE4ZjlkOTY4NzhjMTMyYmFiM2ExNDIwYzRjMGM4NDRiNzk4ZWIzMGNiMzIwYzA0NyIsInZlcnNpb24iOjF9.4iGtnHIrNkvivgWcihLTftRGZiHfBc2-UefBbX8st55HPXemb7A6IYKic96VN8bTBumEcb0PrSMYoSUsP6UFCQ
|
120 |
+
- type: loss
|
121 |
value: 0.9175132444057151
|
122 |
+
name: Loss
|
123 |
verified: true
|
124 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTAzODg3MjM4YWUxODJjOTU5Y2I4MGIwMGEwMTAwMTdjMWZhZTk0NDllNDQ4NWRlODI0NjBiZGI2ZjNjNmUzNyIsInZlcnNpb24iOjF9.u8PyUlKCZw5QqYWeE5WFM2t8IWacQhyHU_jyMPZoK1PvhUVItH80CxKrkimSQNMaTwOPNd53szUesfRkP_yXDA
|
125 |
---
|
126 |
|
127 |
|