Xmm
/

led-large-16384-cnn_dailymail

@@ -21,7 +21,7 @@ model-index:
     metrics:
     - name: Rouge1
       type: rouge
-      value: 0.38124598557029016
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,11 +31,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [allenai/led-base-16384](https://huggingface.co/allenai/led-base-16384) on the cnn_dailymail dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.6312
-- Rouge1: 0.3812
-- Rouge2: 0.1691
-- Rougel: 0.2544
-- Rougelsum: 0.3599
 ## Model description
@@ -66,26 +66,30 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
-|:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
-| 1.9531        | 0.4   | 500  | 1.8639          | 0.3485 | 0.1441 | 0.2275 | 0.3288    |
-| 1.9563        | 0.8   | 1000 | 1.8260          | 0.3538 | 0.1482 | 0.2315 | 0.3343    |
-| 1.7176        | 1.2   | 1500 | 1.8208          | 0.3628 | 0.1527 | 0.2383 | 0.3433    |
-| 1.7197        | 1.6   | 2000 | 1.8162          | 0.3696 | 0.1602 | 0.2434 | 0.3486    |
-| 1.8086        | 2.0   | 2500 | 1.7924          | 0.3558 | 0.1533 | 0.2334 | 0.3361    |
-| 1.2448        | 2.4   | 3000 | 1.8510          | 0.3703 | 0.1591 | 0.2447 | 0.3483    |
-| 1.3574        | 2.8   | 3500 | 1.8277          | 0.3741 | 0.1593 | 0.2422 | 0.3540    |
-| 1.0966        | 3.2   | 4000 | 1.8924          | 0.3682 | 0.1576 | 0.2424 | 0.3479    |
-| 0.9938        | 3.6   | 4500 | 1.8957          | 0.3723 | 0.1599 | 0.2451 | 0.3511    |
-| 1.0735        | 4.0   | 5000 | 1.8772          | 0.3653 | 0.1557 | 0.2399 | 0.3454    |
-| 0.9106        | 4.4   | 5500 | 1.9401          | 0.3720 | 0.1585 | 0.2436 | 0.3504    |
-| 1.015         | 4.8   | 6000 | 1.9320          | 0.3725 | 0.1570 | 0.2429 | 0.3515    |
-| 1.7854        | 0.36  | 6500 | 1.7800          | 0.3624 | 0.1544 | 0.2390 | 0.3422    |
-| 1.9079        | 0.39  | 7000 | 1.7629          | 0.3573 | 0.1553 | 0.2352 | 0.3370    |
-| 1.7606        | 3.34  | 7500 | 1.6902          | 0.3783 | 0.1673 | 0.2521 | 0.3570    |
-| 1.7571        | 3.57  | 8000 | 1.6563          | 0.3802 | 0.1691 | 0.2538 | 0.3587    |
-| 1.6602        | 3.79  | 8500 | 1.6439          | 0.3814 | 0.1693 | 0.2548 | 0.3600    |
-| 1.6614        | 4.01  | 9000 | 1.6312          | 0.3812 | 0.1691 | 0.2544 | 0.3599    |
 ### Framework versions

     metrics:
     - name: Rouge1
       type: rouge
+      value: 0.38275620598885174
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [allenai/led-base-16384](https://huggingface.co/allenai/led-base-16384) on the cnn_dailymail dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6093
+- Rouge1: 0.3828
+- Rouge2: 0.1701
+- Rougel: 0.2561
+- Rougelsum: 0.3613
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|:------:|:---------:|
+| 1.9531        | 0.4   | 500   | 1.8639          | 0.3485 | 0.1441 | 0.2275 | 0.3288    |
+| 1.9563        | 0.8   | 1000  | 1.8260          | 0.3538 | 0.1482 | 0.2315 | 0.3343    |
+| 1.7176        | 1.2   | 1500  | 1.8208          | 0.3628 | 0.1527 | 0.2383 | 0.3433    |
+| 1.7197        | 1.6   | 2000  | 1.8162          | 0.3696 | 0.1602 | 0.2434 | 0.3486    |
+| 1.8086        | 2.0   | 2500  | 1.7924          | 0.3558 | 0.1533 | 0.2334 | 0.3361    |
+| 1.2448        | 2.4   | 3000  | 1.8510          | 0.3703 | 0.1591 | 0.2447 | 0.3483    |
+| 1.3574        | 2.8   | 3500  | 1.8277          | 0.3741 | 0.1593 | 0.2422 | 0.3540    |
+| 1.0966        | 3.2   | 4000  | 1.8924          | 0.3682 | 0.1576 | 0.2424 | 0.3479    |
+| 0.9938        | 3.6   | 4500  | 1.8957          | 0.3723 | 0.1599 | 0.2451 | 0.3511    |
+| 1.0735        | 4.0   | 5000  | 1.8772          | 0.3653 | 0.1557 | 0.2399 | 0.3454    |
+| 0.9106        | 4.4   | 5500  | 1.9401          | 0.3720 | 0.1585 | 0.2436 | 0.3504    |
+| 1.015         | 4.8   | 6000  | 1.9320          | 0.3725 | 0.1570 | 0.2429 | 0.3515    |
+| 1.7854        | 0.36  | 6500  | 1.7800          | 0.3624 | 0.1544 | 0.2390 | 0.3422    |
+| 1.9079        | 0.39  | 7000  | 1.7629          | 0.3573 | 0.1553 | 0.2352 | 0.3370    |
+| 1.7606        | 3.34  | 7500  | 1.6902          | 0.3783 | 0.1673 | 0.2521 | 0.3570    |
+| 1.7571        | 3.57  | 8000  | 1.6563          | 0.3802 | 0.1691 | 0.2538 | 0.3587    |
+| 1.6602        | 3.79  | 8500  | 1.6439          | 0.3814 | 0.1693 | 0.2548 | 0.3600    |
+| 1.6614        | 4.01  | 9000  | 1.6312          | 0.3812 | 0.1691 | 0.2544 | 0.3599    |
+| 1.668         | 4.24  | 9500  | 1.6189          | 0.3815 | 0.1689 | 0.2550 | 0.3603    |
+| 1.6491        | 4.46  | 10000 | 1.6172          | 0.3799 | 0.1681 | 0.2540 | 0.3586    |
+| 1.5994        | 4.68  | 10500 | 1.6132          | 0.3825 | 0.1702 | 0.2560 | 0.3610    |
+| 1.6493        | 4.9   | 11000 | 1.6093          | 0.3828 | 0.1701 | 0.2561 | 0.3613    |
 ### Framework versions

generation_config.json CHANGED Viewed

@@ -1,8 +1,12 @@
 {
-  "_from_model_config": true,
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
   "eos_token_id": 2,
   "pad_token_id": 1,
   "transformers_version": "4.30.2"
 }

 {
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
+  "early_stopping": true,
   "eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 1024,
+  "min_length": 100,
+  "no_repeat_ngram_size": 3,
   "pad_token_id": 1,
   "transformers_version": "4.30.2"
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2022b319ccb1aed2f70426cbc3b2adddf3f7bf76cdcf1b288ef27784bcfbe309
 size 647680813

 version https://git-lfs.github.com/spec/v1
+oid sha256:85be1f4b82f4f336a011fd3bad332bb5e44eec36d11403a6d8fe93a3891ce76d
 size 647680813