Commit
•
5d5e911
1
Parent(s):
f36b058
Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (#17)
Browse files- Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (fdc3b93aa4a00337ef4b49616fbf8885be6a13ee)
Co-authored-by: Evaluation Bot <[email protected]>
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
-
|
4 |
tags:
|
5 |
- opt
|
6 |
- text-generation
|
7 |
-
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-13b_eval
|
@@ -18,14 +18,16 @@ model-index:
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
-
-
|
22 |
-
type: accuracy
|
23 |
value: 0.49666666666666665
|
|
|
24 |
verified: true
|
25 |
-
|
26 |
-
|
27 |
value: 0.7090707456072172
|
|
|
28 |
verified: true
|
|
|
29 |
- task:
|
30 |
type: zero-shot-classification
|
31 |
name: Zero-Shot Text Classification
|
@@ -35,14 +37,16 @@ model-index:
|
|
35 |
config: inverse-scaling--quote-repetition
|
36 |
split: train
|
37 |
metrics:
|
38 |
-
-
|
39 |
-
type: accuracy
|
40 |
value: 0.8
|
|
|
41 |
verified: true
|
42 |
-
|
43 |
-
|
44 |
value: 0.4678814027383723
|
|
|
45 |
verified: true
|
|
|
46 |
- task:
|
47 |
type: zero-shot-classification
|
48 |
name: Zero-Shot Text Classification
|
@@ -52,14 +56,16 @@ model-index:
|
|
52 |
config: inverse-scaling--redefine-math
|
53 |
split: train
|
54 |
metrics:
|
55 |
-
-
|
56 |
-
type: accuracy
|
57 |
value: 0.5933333333333334
|
|
|
58 |
verified: true
|
59 |
-
|
60 |
-
|
61 |
value: 0.7308767640383708
|
|
|
62 |
verified: true
|
|
|
63 |
- task:
|
64 |
type: zero-shot-classification
|
65 |
name: Zero-Shot Text Classification
|
@@ -69,14 +75,16 @@ model-index:
|
|
69 |
config: inverse-scaling--hindsight-neglect-10shot
|
70 |
split: train
|
71 |
metrics:
|
72 |
-
-
|
73 |
-
type: accuracy
|
74 |
value: 0.2698412698412698
|
|
|
75 |
verified: true
|
76 |
-
|
77 |
-
|
78 |
value: 0.7708483344978756
|
|
|
79 |
verified: true
|
|
|
80 |
- task:
|
81 |
type: zero-shot-classification
|
82 |
name: Zero-Shot Text Classification
|
@@ -86,14 +94,16 @@ model-index:
|
|
86 |
config: mathemakitten--winobias_antistereotype_test_cot_v1
|
87 |
split: test
|
88 |
metrics:
|
89 |
-
-
|
90 |
-
type: accuracy
|
91 |
value: 0.3422330097087379
|
|
|
92 |
verified: true
|
93 |
-
|
94 |
-
|
95 |
value: 1.4404955777914985
|
|
|
96 |
verified: true
|
|
|
97 |
- task:
|
98 |
type: zero-shot-classification
|
99 |
name: Zero-Shot Text Classification
|
@@ -103,14 +113,16 @@ model-index:
|
|
103 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
104 |
split: test
|
105 |
metrics:
|
106 |
-
-
|
107 |
-
type: accuracy
|
108 |
value: 0.30339805825242716
|
|
|
109 |
verified: true
|
110 |
-
|
111 |
-
|
112 |
value: 1.539870785999474
|
|
|
113 |
verified: true
|
|
|
114 |
- task:
|
115 |
type: zero-shot-classification
|
116 |
name: Zero-Shot Text Classification
|
@@ -120,14 +132,16 @@ model-index:
|
|
120 |
config: mathemakitten--winobias_antistereotype_test_v5
|
121 |
split: test
|
122 |
metrics:
|
123 |
-
-
|
124 |
-
type: accuracy
|
125 |
value: 0.3640776699029126
|
|
|
126 |
verified: true
|
127 |
-
|
128 |
-
|
129 |
value: 1.4798047741848304
|
|
|
130 |
verified: true
|
|
|
131 |
---
|
132 |
|
133 |
# OPT : Open Pre-trained Transformer Language Models
|
|
|
1 |
---
|
2 |
language: en
|
3 |
+
license: other
|
4 |
tags:
|
5 |
- opt
|
6 |
- text-generation
|
7 |
+
inference: false
|
8 |
commercial: false
|
9 |
model-index:
|
10 |
- name: inverse-scaling/opt-13b_eval
|
|
|
18 |
config: inverse-scaling--NeQA
|
19 |
split: train
|
20 |
metrics:
|
21 |
+
- type: accuracy
|
|
|
22 |
value: 0.49666666666666665
|
23 |
+
name: Accuracy
|
24 |
verified: true
|
25 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWQwNzZlNTM4ZWVjODNkOTIzNjg1NTNkNjE0MGJlMjU4ZWI3NTQzYjg4YTY3MDU2MGViYTYyYjZlZDc0NzQzNCIsInZlcnNpb24iOjF9.qNBGm2Mc3OKjadswivJnO1Lul0NeAjGJe-2FfO57phNPMdgp-rDkTl0YMqC1Rljp8BjT4egJ8IdEQgynUE_hDg
|
26 |
+
- type: loss
|
27 |
value: 0.7090707456072172
|
28 |
+
name: Loss
|
29 |
verified: true
|
30 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMmY2NTAxOTQ3MmUwZjgxZGM0NDU1YmQzNmRmMTk3MTZhM2IxM2EwYmYxNzJjODM4MWMxNWQwOTczZWRiMGU1NyIsInZlcnNpb24iOjF9.rni9n_PdKnee5J_sMwlS0W7QWfhqlAXX6S4dUAakGQFW10zLDBb2pPfkKdSYz956yyTMrKBX0ZYT2uQGWxurAg
|
31 |
- task:
|
32 |
type: zero-shot-classification
|
33 |
name: Zero-Shot Text Classification
|
|
|
37 |
config: inverse-scaling--quote-repetition
|
38 |
split: train
|
39 |
metrics:
|
40 |
+
- type: accuracy
|
|
|
41 |
value: 0.8
|
42 |
+
name: Accuracy
|
43 |
verified: true
|
44 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjFjZjM5NWFjN2Y5ODFiYzRjOGE3MDQ1YmFmYjlkYWRlNTdlMjlhMTY2ZmZmNGQwOWQyNmEzZDk2ZTkwZjQyMCIsInZlcnNpb24iOjF9.Fn-zemt_ghgMvekGYouH-ldScOskoGtbBJ6Mpz8vE27Eca_bOYV6DdQq4Mhd3q9eVqAVg_ybsUFAx215Pjs1Cg
|
45 |
+
- type: loss
|
46 |
value: 0.4678814027383723
|
47 |
+
name: Loss
|
48 |
verified: true
|
49 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWVlZjVlZjE5OGIwYTg0ZjJkZjI0NTA2MzUyNDgyY2EyODIzYzk5Zjg1OTMwMTcyODNlZjM2MWE3YWI0MDlhMCIsInZlcnNpb24iOjF9.kFNX4JZsFTeIaxw8kuuc7l5e4J6KWygm6U4RsKwEr8qZumKuJ0IDVPlNzIh0lh2z7OjbGCHsq1bRbPeJQb_bAg
|
50 |
- task:
|
51 |
type: zero-shot-classification
|
52 |
name: Zero-Shot Text Classification
|
|
|
56 |
config: inverse-scaling--redefine-math
|
57 |
split: train
|
58 |
metrics:
|
59 |
+
- type: accuracy
|
|
|
60 |
value: 0.5933333333333334
|
61 |
+
name: Accuracy
|
62 |
verified: true
|
63 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOWVhN2NiMmM4ODAzYzliMTgwZGI0MTdjOGZiM2QwOWVkNGFiMTUwZTA1OGE5MjQwODBjNzFlNjYyMGViNjU0YSIsInZlcnNpb24iOjF9.nQ_UAPkYBSJNpyCP3Pc9ZG3Ns905vy-41HDVdxZrvrs3s5yhiDIH1Gu6bvAzTeiupPVLCu_Rpfp63e4h1sBDBg
|
64 |
+
- type: loss
|
65 |
value: 0.7308767640383708
|
66 |
+
name: Loss
|
67 |
verified: true
|
68 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzVlYTBjMmI4OTBlN2I2M2IxYmM4NGNhYzkxMzA1MWExOWYxZWFkMzlhZDRlYzk3MzkzOTBiOGU4YTJhNGExMyIsInZlcnNpb24iOjF9.xNkna8ygLtmV3ezRbOeYfushHT-p2Kbja3kKkGhUcfAPjKgUVe-mu9dyxez6G-fUWZHHaXuCZuZMvWqP27MGDA
|
69 |
- task:
|
70 |
type: zero-shot-classification
|
71 |
name: Zero-Shot Text Classification
|
|
|
75 |
config: inverse-scaling--hindsight-neglect-10shot
|
76 |
split: train
|
77 |
metrics:
|
78 |
+
- type: accuracy
|
|
|
79 |
value: 0.2698412698412698
|
80 |
+
name: Accuracy
|
81 |
verified: true
|
82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDc1MzczMmE5ZjRhNWY5YWQzYzc5NTA1OGQ2OTAyYTQzMjFhMWJjYTU2NDYxYThmNzgzMzVmMDNhZmY4ODMxYyIsInZlcnNpb24iOjF9.KtTrigpdC3RydTC0L6ueo-D8lBhsYFTt5ncvlFoDksMDKEo-OiqZj2vkPuErII9Rzr-3H-MqDVyO2UN-VDH7AA
|
83 |
+
- type: loss
|
84 |
value: 0.7708483344978756
|
85 |
+
name: Loss
|
86 |
verified: true
|
87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNDM2NGMzZTBiMjBkNTAxMGI0MWU5YjQ4NmI4OTU5ZmNiMGE4ZTc1MTczOGRmZTVhMmI5MWNkOGZkMWVhZjQxYSIsInZlcnNpb24iOjF9.CKR5kHqjy07_Rkv2VngLM5cl3KRWQ7rHayctMbzmUzDJq39fJq-jkERNW_JZGIZnMQ4GSINGpnrgP_PE73QzBw
|
88 |
- task:
|
89 |
type: zero-shot-classification
|
90 |
name: Zero-Shot Text Classification
|
|
|
94 |
config: mathemakitten--winobias_antistereotype_test_cot_v1
|
95 |
split: test
|
96 |
metrics:
|
97 |
+
- type: accuracy
|
|
|
98 |
value: 0.3422330097087379
|
99 |
+
name: Accuracy
|
100 |
verified: true
|
101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjAwOGI4YTRlNmUwMTNlNTEyNjQ1YWNjOTcxOGM1N2M4YjY3ZDczMzBhYTM1Y2ZhMWNhM2U3NjQwNDc5Zjk2MiIsInZlcnNpb24iOjF9.ig0ColofjUx0XbMxwbc1n0D5ZX_Pd5csQKXt0GtcrMsgGUU1pz26ArpxcNFThaQT33-PwTLSjf7_W_wMnwDsCw
|
102 |
+
- type: loss
|
103 |
value: 1.4404955777914985
|
104 |
+
name: Loss
|
105 |
verified: true
|
106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2JhNGZkOTFiMjFlNGU0MGIxOGI0NTFmN2Q2ODE0ZDEwZjY2NzhmOGU4ZDY3ZDM4Y2ExNGY2MDY4ZDk5ZmFlZSIsInZlcnNpb24iOjF9.9jjeZD1rWaxyIUQO2uyJv2Yf3pNCC6fLnKWJGKSYf2nyWgThKS2JgR0jI4oFG7GtsON03tjeGvmkTdC_Fv7kCQ
|
107 |
- task:
|
108 |
type: zero-shot-classification
|
109 |
name: Zero-Shot Text Classification
|
|
|
113 |
config: mathemakitten--winobias_antistereotype_test_cot_v3
|
114 |
split: test
|
115 |
metrics:
|
116 |
+
- type: accuracy
|
|
|
117 |
value: 0.30339805825242716
|
118 |
+
name: Accuracy
|
119 |
verified: true
|
120 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTE5ZWFlMzQ5ZWRjNDlkZGFiOThiMzRkMTQ3ZDRkYjkzYzY0OTc3NWI1MzBmZDUwZmMzYTBjZDZlOTc0ODdjNyIsInZlcnNpb24iOjF9.hvwwChF87sW6hJ-Jg_pVPagKNACcVTx8-S-_FFbWW97PHZbhtwLgef_tTCGMF2t4HdPssTr1EEgQ3DOh0RfYDg
|
121 |
+
- type: loss
|
122 |
value: 1.539870785999474
|
123 |
+
name: Loss
|
124 |
verified: true
|
125 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMWYwOTgxYmVhMjY3Mzg0NzA5NWY4MmQ4ZjhlYjA0M2YyZDE5MTczZDRhN2FjMjc2MGMwMjU0MDk1YTQ5MzRkZCIsInZlcnNpb24iOjF9.fmdxhv2Ern7ZnCWW19cDTAB3-NaXmYF8xkEw40W2ssxGq50WymezMuqo2ssYGmFZJiiZNPx15OjRQza6V-DDAA
|
126 |
- task:
|
127 |
type: zero-shot-classification
|
128 |
name: Zero-Shot Text Classification
|
|
|
132 |
config: mathemakitten--winobias_antistereotype_test_v5
|
133 |
split: test
|
134 |
metrics:
|
135 |
+
- type: accuracy
|
|
|
136 |
value: 0.3640776699029126
|
137 |
+
name: Accuracy
|
138 |
verified: true
|
139 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDA2NTZjYmM4MmYyNmM2YjA0YTA4NTVlODFlYjBhYTZlOWFmMGU5YzhlM2RkNWFhZTg1NGM4YjI4YzBmY2IxOSIsInZlcnNpb24iOjF9.6yqaB2Owq36GDA3kHfbkWyuxhmj8LhO8kEGYm7vZ6g3qfM6OkkkXFhX-D4bse-W3WILLRb4TE3xAad2EIkSLAA
|
140 |
+
- type: loss
|
141 |
value: 1.4798047741848304
|
142 |
+
name: Loss
|
143 |
verified: true
|
144 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTMxMzVmYThkNmU4ODhmNDgwZWM5ZjM2ZjFkODBjYTY1OGFiNDIwZTM4NDlmMTA4N2Q5ZTk4MThhMzVhN2RjNCIsInZlcnNpb24iOjF9.4i_6ZOjSLyMoPl3BlNMQJ3a1uRYcVpdyaEucECvzJ9786tUQ-RZ-6guKy2-hiZI3DKa1gsks9nPFfeRhLJyiBA
|
145 |
---
|
146 |
|
147 |
# OPT : Open Pre-trained Transformer Language Models
|