LA1512 commited on
Commit
7154fab
1 Parent(s): bd91b5b

LA1512/result

Browse files
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
- base_model: LA1512/PubMed-fine-tune
 
3
  tags:
4
  - generated_from_trainer
5
  datasets:
@@ -21,7 +22,7 @@ model-index:
21
  metrics:
22
  - name: Rouge1
23
  type: rouge
24
- value: 40.7402
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -29,14 +30,14 @@ should probably proofread and complete it, then remove this comment. -->
29
 
30
  # results
31
 
32
- This model is a fine-tuned version of [LA1512/PubMed-fine-tune](https://huggingface.co/LA1512/PubMed-fine-tune) on the pubmed-summarization dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 3.6196
35
- - Rouge1: 40.7402
36
- - Rouge2: 16.1978
37
- - Rougel: 24.4278
38
- - Rougelsum: 36.5282
39
- - Gen Len: 179.6185
40
 
41
  ## Model description
42
 
@@ -56,22 +57,17 @@ More information needed
56
 
57
  The following hyperparameters were used during training:
58
  - learning_rate: 1e-05
59
- - train_batch_size: 8
60
- - eval_batch_size: 8
61
  - seed: 42
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
  - lr_scheduler_type: linear
64
  - lr_scheduler_warmup_steps: 500
65
- - num_epochs: 3
66
  - label_smoothing_factor: 0.1
67
 
68
  ### Training results
69
 
70
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
71
- |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:--------:|
72
- | 3.6132 | 1.0 | 2500 | 3.6766 | 40.5092 | 15.7678 | 24.1228 | 36.3318 | 183.7205 |
73
- | 3.5939 | 2.0 | 5000 | 3.6276 | 40.7583 | 16.1779 | 24.4375 | 36.5537 | 181.4365 |
74
- | 3.5419 | 3.0 | 7500 | 3.6196 | 40.7402 | 16.1978 | 24.4278 | 36.5282 | 179.6185 |
75
 
76
 
77
  ### Framework versions
 
1
  ---
2
+ license: bsd-3-clause
3
+ base_model: pszemraj/led-base-book-summary
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
22
  metrics:
23
  - name: Rouge1
24
  type: rouge
25
+ value: 42.8608
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # results
32
 
33
+ This model is a fine-tuned version of [pszemraj/led-base-book-summary](https://huggingface.co/pszemraj/led-base-book-summary) on the pubmed-summarization dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 3.2597
36
+ - Rouge1: 42.8608
37
+ - Rouge2: 16.655
38
+ - Rougel: 23.8425
39
+ - Rougelsum: 38.0076
40
+ - Gen Len: 273.807
41
 
42
  ## Model description
43
 
 
57
 
58
  The following hyperparameters were used during training:
59
  - learning_rate: 1e-05
60
+ - train_batch_size: 2
61
+ - eval_batch_size: 2
62
  - seed: 42
63
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
  - lr_scheduler_type: linear
65
  - lr_scheduler_warmup_steps: 500
66
+ - num_epochs: 1
67
  - label_smoothing_factor: 0.1
68
 
69
  ### Training results
70
 
 
 
 
 
 
71
 
72
 
73
  ### Framework versions
config.json CHANGED
@@ -1,37 +1,35 @@
1
  {
2
- "_name_or_path": "LA1512/PubMed-fine-tune",
3
- "_num_labels": 3,
4
- "activation_dropout": 0,
5
  "activation_function": "gelu",
6
- "add_bias_logits": false,
7
- "add_final_layer_norm": false,
8
  "architectures": [
9
- "BartForConditionalGeneration"
 
 
 
 
 
 
 
 
 
10
  ],
11
- "attention_dropout": 0.1,
12
  "bos_token_id": 0,
13
- "classif_dropout": 0,
14
- "classifier_dropout": 0,
15
- "d_model": 1024,
16
- "decoder_attention_heads": 16,
17
- "decoder_ffn_dim": 4096,
18
- "decoder_layerdrop": 0,
19
- "decoder_layers": 3,
20
  "decoder_start_token_id": 2,
21
  "dropout": 0.1,
22
  "early_stopping": true,
23
- "encoder_attention_heads": 16,
24
- "encoder_ffn_dim": 4096,
25
- "encoder_layerdrop": 0,
26
- "encoder_layers": 12,
27
  "eos_token_id": 2,
28
- "eos_token_ids": [
29
- 2
30
- ],
31
- "extra_pos_embeddings": 2,
32
- "force_bos_token_to_be_generated": false,
33
- "forced_eos_token_id": 2,
34
- "gradient_checkpointing": false,
35
  "id2label": {
36
  "0": "LABEL_0",
37
  "1": "LABEL_1",
@@ -44,24 +42,19 @@
44
  "LABEL_1": 1,
45
  "LABEL_2": 2
46
  },
47
- "max_length": 62,
48
- "max_position_embeddings": 1024,
49
- "min_length": 11,
50
- "model_type": "bart",
 
 
51
  "no_repeat_ngram_size": 3,
52
- "normalize_before": false,
53
- "normalize_embedding": true,
54
- "num_beams": 6,
55
- "num_hidden_layers": 12,
56
- "output_past": true,
57
  "pad_token_id": 1,
58
- "prefix": " ",
59
- "save_step": 58,
60
- "scale_embedding": false,
61
- "static_position_embeddings": false,
62
- "task_specific_params": {},
63
  "torch_dtype": "float32",
64
  "transformers_version": "4.39.3",
65
  "use_cache": true,
66
- "vocab_size": 50264
67
  }
 
1
  {
2
+ "_name_or_path": "pszemraj/led-base-book-summary",
3
+ "activation_dropout": 0.0,
 
4
  "activation_function": "gelu",
 
 
5
  "architectures": [
6
+ "LEDForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attention_window": [
10
+ 1024,
11
+ 1024,
12
+ 1024,
13
+ 1024,
14
+ 1024,
15
+ 1024
16
  ],
 
17
  "bos_token_id": 0,
18
+ "classif_dropout": 0.0,
19
+ "classifier_dropout": 0.0,
20
+ "d_model": 768,
21
+ "decoder_attention_heads": 12,
22
+ "decoder_ffn_dim": 3072,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 6,
25
  "decoder_start_token_id": 2,
26
  "dropout": 0.1,
27
  "early_stopping": true,
28
+ "encoder_attention_heads": 12,
29
+ "encoder_ffn_dim": 3072,
30
+ "encoder_layerdrop": 0.0,
31
+ "encoder_layers": 6,
32
  "eos_token_id": 2,
 
 
 
 
 
 
 
33
  "id2label": {
34
  "0": "LABEL_0",
35
  "1": "LABEL_1",
 
42
  "LABEL_1": 1,
43
  "LABEL_2": 2
44
  },
45
+ "length_penalty": 0.8,
46
+ "max_decoder_position_embeddings": 1024,
47
+ "max_encoder_position_embeddings": 16384,
48
+ "max_length": 1024,
49
+ "min_length": 8,
50
+ "model_type": "led",
51
  "no_repeat_ngram_size": 3,
52
+ "num_beams": 4,
53
+ "num_hidden_layers": 6,
 
 
 
54
  "pad_token_id": 1,
55
+ "repetition_penalty": 3.5,
 
 
 
 
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.39.3",
58
  "use_cache": true,
59
+ "vocab_size": 50265
60
  }
generation_config.json CHANGED
@@ -3,11 +3,12 @@
3
  "decoder_start_token_id": 2,
4
  "early_stopping": true,
5
  "eos_token_id": 2,
6
- "forced_eos_token_id": 2,
7
- "max_length": 62,
8
- "min_length": 11,
9
  "no_repeat_ngram_size": 3,
10
- "num_beams": 6,
11
  "pad_token_id": 1,
 
12
  "transformers_version": "4.39.3"
13
  }
 
3
  "decoder_start_token_id": 2,
4
  "early_stopping": true,
5
  "eos_token_id": 2,
6
+ "length_penalty": 0.8,
7
+ "max_length": 1024,
8
+ "min_length": 8,
9
  "no_repeat_ngram_size": 3,
10
+ "num_beams": 4,
11
  "pad_token_id": 1,
12
+ "repetition_penalty": 3.5,
13
  "transformers_version": "4.39.3"
14
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f34a4a5a93856e5d6532db0bb0e56e053b236b0106766ea844b9d61906ffd50
3
- size 1020714768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb69fe3cb92cba61864654b5ea695a5e8362996f88f1e41beb60fcdf1a6ea33
3
+ size 647614116
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 256
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 1024
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
tokenizer_config.json CHANGED
@@ -48,10 +48,17 @@
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
- "model_max_length": 1024,
 
 
52
  "pad_token": "<pad>",
 
 
53
  "sep_token": "</s>",
54
- "tokenizer_class": "BartTokenizer",
 
55
  "trim_offsets": true,
 
 
56
  "unk_token": "<unk>"
57
  }
 
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
+ "max_length": 1024,
52
+ "model_max_length": 16384,
53
+ "pad_to_multiple_of": null,
54
  "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
  "sep_token": "</s>",
58
+ "stride": 0,
59
+ "tokenizer_class": "LEDTokenizer",
60
  "trim_offsets": true,
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
  "unk_token": "<unk>"
64
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8996fa5924b2adc468689d11dc75794fbe5d22d47de3739d11c93031ebbedd55
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:301b9e6e7201f138110067138a881ad800c0a1bf46b87ae55423081b9037e7bf
3
  size 5048