Commit
•
e4d17e2
1
Parent(s):
3d93353
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
Browse filesBeep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
-
|
4 |
tags:
|
5 |
- text-generation
|
6 |
- opt
|
7 |
-
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-125m_eval
|
@@ -18,14 +18,16 @@ model-index:
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
-
-
|
22 |
-
type: accuracy
|
23 |
value: 0.4666666666666667
|
|
|
24 |
verified: true
|
25 |
-
|
26 |
-
|
27 |
value: 0.9069941281403104
|
|
|
28 |
verified: true
|
|
|
29 |
- task:
|
30 |
type: zero-shot-classification
|
31 |
name: Zero-Shot Text Classification
|
@@ -35,14 +37,16 @@ model-index:
|
|
35 |
config: inverse-scaling--quote-repetition
|
36 |
split: train
|
37 |
metrics:
|
38 |
-
-
|
39 |
-
type: accuracy
|
40 |
value: 0.96
|
|
|
41 |
verified: true
|
42 |
-
|
43 |
-
|
44 |
value: 0.04267331124324727
|
|
|
45 |
verified: true
|
|
|
46 |
- task:
|
47 |
type: zero-shot-classification
|
48 |
name: Zero-Shot Text Classification
|
@@ -52,14 +56,16 @@ model-index:
|
|
52 |
config: inverse-scaling--redefine-math
|
53 |
split: train
|
54 |
metrics:
|
55 |
-
-
|
56 |
-
type: accuracy
|
57 |
value: 0.7566666666666667
|
|
|
58 |
verified: true
|
59 |
-
|
60 |
-
|
61 |
value: 0.5209774699724383
|
|
|
62 |
verified: true
|
|
|
63 |
- task:
|
64 |
type: zero-shot-classification
|
65 |
name: Zero-Shot Text Classification
|
@@ -69,14 +75,16 @@ model-index:
|
|
69 |
config: inverse-scaling--hindsight-neglect-10shot
|
70 |
split: train
|
71 |
metrics:
|
72 |
-
-
|
73 |
-
type: accuracy
|
74 |
value: 0.5047619047619047
|
|
|
75 |
verified: true
|
76 |
-
|
77 |
-
|
78 |
value: 0.8965487285916295
|
|
|
79 |
verified: true
|
|
|
80 |
- task:
|
81 |
type: zero-shot-classification
|
82 |
name: Zero-Shot Text Classification
|
@@ -86,14 +94,16 @@ model-index:
|
|
86 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
87 |
split: test
|
88 |
metrics:
|
89 |
-
-
|
90 |
-
type: accuracy
|
91 |
value: 0.47815533980582525
|
|
|
92 |
verified: true
|
93 |
-
|
94 |
-
|
95 |
value: 0.8500587756725001
|
|
|
96 |
verified: true
|
|
|
97 |
- task:
|
98 |
type: zero-shot-classification
|
99 |
name: Zero-Shot Text Classification
|
@@ -103,14 +113,16 @@ model-index:
|
|
103 |
config: mathemakitten--winobias_antistereotype_test_v5
|
104 |
split: test
|
105 |
metrics:
|
106 |
-
-
|
107 |
-
type: accuracy
|
108 |
value: 0.5024271844660194
|
|
|
109 |
verified: true
|
110 |
-
|
111 |
-
|
112 |
value: 0.8860152396463484
|
|
|
113 |
verified: true
|
|
|
114 |
---
|
115 |
|
116 |
# OPT : Open Pre-trained Transformer Language Models
|
|
|
1 |
---
|
2 |
language: en
|
3 |
+
license: other
|
4 |
tags:
|
5 |
- text-generation
|
6 |
- opt
|
7 |
+
inference: false
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-125m_eval
|
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
+
- type: accuracy
|
|
|
22 |
value: 0.4666666666666667
|
23 |
+
name: Accuracy
|
24 |
verified: true
|
25 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMjBkYzg3OGQ2NGEwMzE3MmRlNDNjOTQ5YjI2ZmY5ZmExYmMwZGMzOGU4MDM5NmUxMmM0MzlmNmU3OGMxOWNlNyIsInZlcnNpb24iOjF9.6hSSu8iq_f8MCiI3vaVEE2x-Z_7SfVSXu2vEIGggKG1Z1oC1E3-Y7VbZM7cMJKzRvcskLBFaRHYoaU2uZi5gCA
|
26 |
+
- type: loss
|
27 |
value: 0.9069941281403104
|
28 |
+
name: Loss
|
29 |
verified: true
|
30 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTNhMDE3NGEyY2UwN2M4ZTNlYjA0YjM1OWZiNWI4MWRjYmRkOGFjMDA2YjZkZWM0YjczMjRhZDIxMmQxMmQ3MCIsInZlcnNpb24iOjF9.ngIQdf8pOt8WcuIo6_vR5nsLCuazdU2605JI-cvjuG6uyBfAE7xWV-ZLqqVZ85cfpGGso1e3FDcnjNgCuS19CQ
|
31 |
- task:
|
32 |
type: zero-shot-classification
|
33 |
name: Zero-Shot Text Classification
|
|
|
37 |
config: inverse-scaling--quote-repetition
|
38 |
split: train
|
39 |
metrics:
|
40 |
+
- type: accuracy
|
|
|
41 |
value: 0.96
|
42 |
+
name: Accuracy
|
43 |
verified: true
|
44 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzk1NTY4YmYzMzE3OGQ2OGM4NjljNmM0NTc0MWMxZTI3MGI3OTBkMzE3OTJkMjRiYzU2OGUwMjdhMTY1Y2M0MyIsInZlcnNpb24iOjF9.1uGnbKuVoPXeK2zF3nIqAPUeiWodBA78BhDgHk-8Kq9Vh6WtvcL0qwOvQVLjjPmL_7G56Y0d6cuXWycACwuhAQ
|
45 |
+
- type: loss
|
46 |
value: 0.04267331124324727
|
47 |
+
name: Loss
|
48 |
verified: true
|
49 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNGI3MTBiODBlNjNlZGExNzBhMjgxNjNhNDQ5OGQ5YTBjMjQzNTMwNWQ3MDY3NWY2NzJjOGYzNmFjZTE2ODYzNyIsInZlcnNpb24iOjF9.OoXOKgtCjrB3iku_GtinmPFeFdMJWExa2N-VbKKoymMX9pQJ3Wh9cVbKWI2nTHsoTQI_lu_3s9ZjVVk7_v9zAA
|
50 |
- task:
|
51 |
type: zero-shot-classification
|
52 |
name: Zero-Shot Text Classification
|
|
|
56 |
config: inverse-scaling--redefine-math
|
57 |
split: train
|
58 |
metrics:
|
59 |
+
- type: accuracy
|
|
|
60 |
value: 0.7566666666666667
|
61 |
+
name: Accuracy
|
62 |
verified: true
|
63 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTRkMzAyYzcwOGZmNDVhMTMwOGQxOWVhZDE2NzVkMGRkNDJjNzFlMjZkNDFlZDMyZTA0YjYwNTBjNTBlODg2NCIsInZlcnNpb24iOjF9.Mxc3griLDkTEYTJyF0EamDwHEtzN2IkiXKYY9HmIl6HbHvLoJn9Qz1Ot6EE_T0VJbL11Ih7XOgELgiZ35XU3Cw
|
64 |
+
- type: loss
|
65 |
value: 0.5209774699724383
|
66 |
+
name: Loss
|
67 |
verified: true
|
68 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjZiZjIzZGUyOGFjODU2ZDk4N2ZmMjc5MmZkY2NmODAyNDhjODQ1MDZiMDc0NDdlM2VmZDc2ZWRhMmFjM2ZhMyIsInZlcnNpb24iOjF9.rWg9_9Z5YtqgO7H61K8w1cp_7GTGsyRpMhACpqioXSnQ6z0sL-rtkwb1QKjD0yQH3MEHr2Grwsh7iUmY0nWjDQ
|
69 |
- task:
|
70 |
type: zero-shot-classification
|
71 |
name: Zero-Shot Text Classification
|
|
|
75 |
config: inverse-scaling--hindsight-neglect-10shot
|
76 |
split: train
|
77 |
metrics:
|
78 |
+
- type: accuracy
|
|
|
79 |
value: 0.5047619047619047
|
80 |
+
name: Accuracy
|
81 |
verified: true
|
82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTAxMTI4OWNkNzQ0NTZjOGZhNWJmYjBlZGMyMjg2YjJjZWJjNzU1MmIzNWM5MTg5MzhjYmQ0YzI5NzM5NTVjZiIsInZlcnNpb24iOjF9.dzv4FTu8IIWWu8V497AzCWSjytzv_PnxriQ9aWOUd6AkQCOZQeCLrLYLifoK_BJ2SBcuBum6TS-Ukx9MalklAA
|
83 |
+
- type: loss
|
84 |
value: 0.8965487285916295
|
85 |
+
name: Loss
|
86 |
verified: true
|
87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2ExZjE2ZWIxODBjZTA0OTI1NzI0NTRlMTIxNDI1YjA4OTM5YzVkMzc4N2MzZTc4ZTA4OTFiYTlkMjcyYjY0MiIsInZlcnNpb24iOjF9.FjnpzThx7mRfh1U_R12KCUJ2wDxjaEKQC3iSSVAvzP1xXLESxA4c014Xzucw1Ugaq_P8s5ySzlPgGUp7qqTtBA
|
88 |
- task:
|
89 |
type: zero-shot-classification
|
90 |
name: Zero-Shot Text Classification
|
|
|
94 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
95 |
split: test
|
96 |
metrics:
|
97 |
+
- type: accuracy
|
|
|
98 |
value: 0.47815533980582525
|
99 |
+
name: Accuracy
|
100 |
verified: true
|
101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDdkNmEwOTQwZTI4MzE4YjlmNjIwZTIxMWM3YWM1YzYyMWM1OTY1YmZkNjhjYmUyZjJjZjZkMTljNjZlMzUwYyIsInZlcnNpb24iOjF9.PLaVz67JgdncUXDz3BXmZC41HKVl3_D1Iz5cgygbn2y4OsfVyvsyvU3GFqKgPb-gvXT4xGMxkV0FvA28gjTGDw
|
102 |
+
- type: loss
|
103 |
value: 0.8500587756725001
|
104 |
+
name: Loss
|
105 |
verified: true
|
106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiODFjNjUwMWI2Y2UwNzQ0NDE4NTU1NGI3YzQyMDNhOWU3YjU0MGRhMjEyZjNkMzczYWU2MDY0NGIyZmM5MWY5OCIsInZlcnNpb24iOjF9.9VQeAZ_lvyKC2RNQ2GmqSrxXCz2W8NZz14JhF3j4boBHXRm1V07wml6uNW_GfDt6Qwiu5IZCqMdvCavacDUoDw
|
107 |
- task:
|
108 |
type: zero-shot-classification
|
109 |
name: Zero-Shot Text Classification
|
|
|
113 |
config: mathemakitten--winobias_antistereotype_test_v5
|
114 |
split: test
|
115 |
metrics:
|
116 |
+
- type: accuracy
|
|
|
117 |
value: 0.5024271844660194
|
118 |
+
name: Accuracy
|
119 |
verified: true
|
120 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDRjYzkzZDI1MDRjY2JiNDUyNGJmNmVlZTMxYmJjODIzNDc2NGI3MzBjN2RkNGRjZjg5ZjJiYjM1ODQyMjQyMyIsInZlcnNpb24iOjF9.uLQjZb34N0QHPgeMnJkPk3xG3VI4Z_djPpCvah29a9D0fOHMuqdqynnySODmwfdbKecEV5za8wUf6_ny4qktDQ
|
121 |
+
- type: loss
|
122 |
value: 0.8860152396463484
|
123 |
+
name: Loss
|
124 |
verified: true
|
125 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWIzODA0ZjExNzJiMDBjNzlkYzFjMzk2NGMxNzM0ODQyNmFhMDczM2EwMWU1N2VjMjcxNGEzMTdjN2IyNDJhNSIsInZlcnNpb24iOjF9.ipVZVlS7Rey-vsqEhAmOjcz4pkl85Brn8i1aTc4eSXQ2KgG5ScuAgeIVcxe3EbCSJsRkJowRqRqqWKBodiyAAQ
|
126 |
---
|
127 |
|
128 |
# OPT : Open Pre-trained Transformer Language Models
|