autoevaluator HF staff commited on
Commit
fb8c7a9
1 Parent(s): 2179ab8

Add evaluation results on the default config and test split of gigaword

Browse files

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋!\
Your model has been evaluated on the default config and test split of the [gigaword](https://huggingface.co/datasets/gigaword) dataset by

@pkumark
, using the predictions stored [here](https://huggingface.co/datasets/autoevaluate/autoeval-eval-gigaword-default-eb1b4a-58978145360).\
Accept this pull request to see the results displayed on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=gigaword).\
Evaluate your model on more datasets [here](https://huggingface.co/spaces/autoevaluate/model-evaluator?dataset=gigaword).

Files changed (1) hide show
  1. README.md +89 -50
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
- tags:
3
- - summarization
4
  language:
5
  - en
6
  license: mit
 
 
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
@@ -16,29 +16,29 @@ model-index:
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
- - name: ROUGE-1
20
- type: rouge
21
  value: 25.2697
 
22
  verified: true
23
- - name: ROUGE-2
24
- type: rouge
25
  value: 7.6638
 
26
  verified: true
27
- - name: ROUGE-L
28
- type: rouge
29
  value: 17.1808
 
30
  verified: true
31
- - name: ROUGE-LSUM
32
- type: rouge
33
  value: 21.7933
 
34
  verified: true
35
- - name: loss
36
- type: loss
37
  value: 3.5042972564697266
 
38
  verified: true
39
- - name: gen_len
40
- type: gen_len
41
  value: 27.4462
 
42
  verified: true
43
  - task:
44
  type: summarization
@@ -49,29 +49,29 @@ model-index:
49
  config: default
50
  split: test
51
  metrics:
52
- - name: ROUGE-1
53
- type: rouge
54
  value: 45.4525
 
55
  verified: true
56
- - name: ROUGE-2
57
- type: rouge
58
  value: 22.3455
 
59
  verified: true
60
- - name: ROUGE-L
61
- type: rouge
62
  value: 37.2302
 
63
  verified: true
64
- - name: ROUGE-LSUM
65
- type: rouge
66
  value: 37.2323
 
67
  verified: true
68
- - name: loss
69
- type: loss
70
  value: 2.3128726482391357
 
71
  verified: true
72
- - name: gen_len
73
- type: gen_len
74
  value: 25.5435
 
75
  verified: true
76
  - task:
77
  type: summarization
@@ -82,29 +82,29 @@ model-index:
82
  config: samsum
83
  split: train
84
  metrics:
85
- - name: ROUGE-1
86
- type: rouge
87
  value: 24.7852
 
88
  verified: true
89
- - name: ROUGE-2
90
- type: rouge
91
  value: 5.2533
 
92
  verified: true
93
- - name: ROUGE-L
94
- type: rouge
95
  value: 18.6792
 
96
  verified: true
97
- - name: ROUGE-LSUM
98
- type: rouge
99
  value: 20.629
 
100
  verified: true
101
- - name: loss
102
- type: loss
103
  value: 3.746837854385376
 
104
  verified: true
105
- - name: gen_len
106
- type: gen_len
107
  value: 23.1206
 
108
  verified: true
109
  - task:
110
  type: summarization
@@ -115,30 +115,69 @@ model-index:
115
  config: samsum
116
  split: test
117
  metrics:
118
- - name: ROUGE-1
119
- type: rouge
120
  value: 24.9158
 
121
  verified: true
122
- - name: ROUGE-2
123
- type: rouge
124
  value: 5.5837
 
125
  verified: true
126
- - name: ROUGE-L
127
- type: rouge
128
  value: 18.8935
 
129
  verified: true
130
- - name: ROUGE-LSUM
131
- type: rouge
132
  value: 20.76
 
133
  verified: true
134
- - name: loss
135
- type: loss
136
  value: 3.775235891342163
 
137
  verified: true
138
- - name: gen_len
139
- type: gen_len
140
  value: 23.0928
 
141
  verified: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ---
143
  ### Bart model finetuned on xsum
144
 
 
1
  ---
 
 
2
  language:
3
  - en
4
  license: mit
5
+ tags:
6
+ - summarization
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
 
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
+ - type: rouge
 
20
  value: 25.2697
21
+ name: ROUGE-1
22
  verified: true
23
+ - type: rouge
 
24
  value: 7.6638
25
+ name: ROUGE-2
26
  verified: true
27
+ - type: rouge
 
28
  value: 17.1808
29
+ name: ROUGE-L
30
  verified: true
31
+ - type: rouge
 
32
  value: 21.7933
33
+ name: ROUGE-LSUM
34
  verified: true
35
+ - type: loss
 
36
  value: 3.5042972564697266
37
+ name: loss
38
  verified: true
39
+ - type: gen_len
 
40
  value: 27.4462
41
+ name: gen_len
42
  verified: true
43
  - task:
44
  type: summarization
 
49
  config: default
50
  split: test
51
  metrics:
52
+ - type: rouge
 
53
  value: 45.4525
54
+ name: ROUGE-1
55
  verified: true
56
+ - type: rouge
 
57
  value: 22.3455
58
+ name: ROUGE-2
59
  verified: true
60
+ - type: rouge
 
61
  value: 37.2302
62
+ name: ROUGE-L
63
  verified: true
64
+ - type: rouge
 
65
  value: 37.2323
66
+ name: ROUGE-LSUM
67
  verified: true
68
+ - type: loss
 
69
  value: 2.3128726482391357
70
+ name: loss
71
  verified: true
72
+ - type: gen_len
 
73
  value: 25.5435
74
+ name: gen_len
75
  verified: true
76
  - task:
77
  type: summarization
 
82
  config: samsum
83
  split: train
84
  metrics:
85
+ - type: rouge
 
86
  value: 24.7852
87
+ name: ROUGE-1
88
  verified: true
89
+ - type: rouge
 
90
  value: 5.2533
91
+ name: ROUGE-2
92
  verified: true
93
+ - type: rouge
 
94
  value: 18.6792
95
+ name: ROUGE-L
96
  verified: true
97
+ - type: rouge
 
98
  value: 20.629
99
+ name: ROUGE-LSUM
100
  verified: true
101
+ - type: loss
 
102
  value: 3.746837854385376
103
+ name: loss
104
  verified: true
105
+ - type: gen_len
 
106
  value: 23.1206
107
+ name: gen_len
108
  verified: true
109
  - task:
110
  type: summarization
 
115
  config: samsum
116
  split: test
117
  metrics:
118
+ - type: rouge
 
119
  value: 24.9158
120
+ name: ROUGE-1
121
  verified: true
122
+ - type: rouge
 
123
  value: 5.5837
124
+ name: ROUGE-2
125
  verified: true
126
+ - type: rouge
 
127
  value: 18.8935
128
+ name: ROUGE-L
129
  verified: true
130
+ - type: rouge
 
131
  value: 20.76
132
+ name: ROUGE-LSUM
133
  verified: true
134
+ - type: loss
 
135
  value: 3.775235891342163
136
+ name: loss
137
  verified: true
138
+ - type: gen_len
 
139
  value: 23.0928
140
+ name: gen_len
141
  verified: true
142
+ - task:
143
+ type: summarization
144
+ name: Summarization
145
+ dataset:
146
+ name: gigaword
147
+ type: gigaword
148
+ config: default
149
+ split: test
150
+ metrics:
151
+ - type: rouge
152
+ value: 16.579
153
+ name: ROUGE-1
154
+ verified: true
155
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMDFkOTg5ZTM4MDk3NGY4NTcxYzgyY2IxMzlkYmQ0OGUxNzM2YzBhMTI3YjZhMjAzMzZjM2FlZTJmOGM0NmNhMCIsInZlcnNpb24iOjF9.v29vzy3WA75dWDALKoo_pnByEd1kJUQb1TICtczTMvpFmmZBUj1JMbEAX9DMwQa8gq30hbIAvnXa7B0w33oKDQ
156
+ - type: rouge
157
+ value: 3.7036
158
+ name: ROUGE-2
159
+ verified: true
160
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYjMxYjM2MGUwZTI4OGJiZTMzMzY2OWNhNGIyM2NkOTk5ODI3OGRhZGRmMjIyZGNkMzg2MTI3OTg0NjgzMWRkNCIsInZlcnNpb24iOjF9.QWCiactfiS4V_rdnjpt3MJUciVpT7VlMRzGmFJ2PxioadewS2kApbYqu6CCilqC9hfVfsLg-VxrH-Os16Xp2Cg
161
+ - type: rouge
162
+ value: 14.3642
163
+ name: ROUGE-L
164
+ verified: true
165
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjJjNDgxOTYwOGVmMWYxN2U5ZmM3MDg4NmYwMDllMjgzNjI2OTUxZDQyMzQ5NmY5ZWIwMjlkODdhZDcyNjIzMCIsInZlcnNpb24iOjF9.HbWvD3wc7PRAyT87I-5do9GU0cCXQuEIBDAVSDdGEKXfY-LQ_HBI87vl46z7okCPB7xqAQ-jBsLFmjekw-DSAA
166
+ - type: rouge
167
+ value: 14.3846
168
+ name: ROUGE-LSUM
169
+ verified: true
170
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzBmNjNhZmMyNDY0N2Y4OTJmYTk0NWEwYjJjZDJiODMzMmUwNzhmMjVlYzkxMTg1YWYyZThlODY4MjE4NTAwYSIsInZlcnNpb24iOjF9.EZkXXT058YsPzxr-_k-asvYgBaf5IjJyuUzn_IrMEvqW6khWxq8MnnWeGkVPqhVq7hunVvrEYGo2N5ZJbCYOCQ
171
+ - type: loss
172
+ value: 6.708169460296631
173
+ name: loss
174
+ verified: true
175
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZGM3NmJjNmE4MWM3MGFjM2YzMDRjNGY1MzhjYWI0ZWVhNDQ2ZDA3ODVhZTVhZjRjZjFmMDMzNjZjMDRmMmQxNyIsInZlcnNpb24iOjF9.0IiFBEZWGIkz6FUNbxFwDsPivj0Vdzd0txS_5u0zraLxiML-rnitSWOip-WYbNRO9Gmllu3nRzeKBFFjkOj3DQ
176
+ - type: gen_len
177
+ value: 23.694
178
+ name: gen_len
179
+ verified: true
180
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTc2MzlmNjE5ZmVkYTVmZGM5NzFhNDNkODcwOGUyNTFkOTkyOTNmMDNlNjU0YTlmYWMyNzljMDFhZTQzZmZiNSIsInZlcnNpb24iOjF9.MbmvTN8XJuMVhd4JJQdKpnijOKaS2OA2_cjO53777newl0EOHJtTF0CMS3sOjIVt7lrJUEr2JcfY7n8BPxDaAg
181
  ---
182
  ### Bart model finetuned on xsum
183